Compare commits

...

20 Commits
main ... 0.1.86

Author SHA1 Message Date
ValueRaider
cdae1cf226 Bump version to 0.1.86 2022-11-14 12:49:43 +00:00
ValueRaider
bca569318e Merge pull request #1170 from ranaroussi/patch/default-start
Backport #1169 (default start)
2022-11-13 11:52:51 +00:00
ValueRaider
d11cd85a66 Backport #1169 (default start) 2022-11-13 11:51:41 +00:00
ValueRaider
2d32a6e204 Merge pull request #1162 from ranaroussi/patch/tz-csv-error
Fix corrupt tkr-tz-csv halting code
2022-11-10 21:51:10 +00:00
ValueRaider
1687ae66ab Fix corrupt tkr-tz-csv halting code 2022-11-10 14:19:21 +00:00
ValueRaider
ddc34348d9 Merge pull request #1142 from ranaroussi/patch-0.1/delisted-tkr-errors
Improve handling delisted tickers
2022-11-03 22:56:16 +00:00
ValueRaider
1d74cfeb19 Merge pull request #1141 from ranaroussi/patch-0.1/trailing-peg-ratio
Move get 'trailingPegRatio' into _get_info(), simplify & optimise
2022-11-03 22:55:39 +00:00
ValueRaider
1589d07b56 Move get 'trailingPegRatio' into _get_info(), simplify & optimise 2022-11-03 22:53:04 +00:00
ValueRaider
d261237320 Improve handling delisted tickers 2022-11-03 22:49:12 +00:00
ValueRaider
66af3080dd Bump version to 0.1.85 2022-11-03 19:04:45 +00:00
ValueRaider
9d396b9559 Merge pull request #1135 from ranaroussi/patch/unknown-ticker-timezone
Backport ticker tz verification for nice error
2022-11-02 15:18:26 +00:00
ValueRaider
23b6ad12c1 Backport ticker tz verification for nice error 2022-10-31 21:14:50 +00:00
ValueRaider
22131e9fc7 Merge pull request #1124 from Jossan84/main
Bugfix: Get logo url when no website exists
2022-10-27 22:34:18 +01:00
ValueRaider
e99e61f95a Bump version to 0.1.84 2022-10-26 00:12:29 +01:00
ValueRaider
a3fe95ea27 Make tz-cache thread-safe 2022-10-26 00:09:23 +01:00
ValueRaider
000cb70bcb Bump version to 0.1.83 2022-10-25 23:23:32 +01:00
ValueRaider
c8d9d06e75 Expose _fetch_ticker_tz() arguments 2022-10-25 23:21:56 +01:00
ValueRaider
a5e07a0375 Bump version to 0.1.82 2022-10-25 23:15:48 +01:00
ValueRaider
a0a12bcf4c Backport _fetch_ticker_tz() 2022-10-25 23:07:48 +01:00
Jose Manuel
42e5751705 Bugfix: Get logo url when no website exists 2022-09-19 13:54:56 +02:00
5 changed files with 225 additions and 87 deletions

View File

@@ -1,6 +1,26 @@
Change Log
===========
0.1.86
------
- Fix 'trailingPegRatio' #1141
- Improve handling delisted tickers #1142
- Fix corrupt tkr-tz-csv halting code #1162
- Change default start to 1900-01-01 #1170
0.1.85
------
- Fix info['log_url'] #1062
- Fix handling delisted ticker #1137
0.1.84
------
- Make tz-cache thread-safe
0.1.83
------
- Reduce spam-effect of tz-fetch
0.1.81
------
- Fix unhandled tz-cache exception #1107

View File

@@ -15,21 +15,83 @@ Sanity check for most common library uses all working
import yfinance as yf
import unittest
import datetime
session = None
import requests_cache ; session = requests_cache.CachedSession("yfinance.cache", expire_after=24*60*60)
symbols = ['MSFT', 'IWO', 'VFINX', '^GSPC', 'BTC-USD']
tickers = [yf.Ticker(symbol) for symbol in symbols]
tickers = [yf.Ticker(symbol, session=session) for symbol in symbols]
delisted_symbols = ["BRK.B", "SDLP"]
delisted_tickers = [yf.Ticker(symbol, session=session) for symbol in delisted_symbols]
class TestTicker(unittest.TestCase):
def setUp(self):
d_today = datetime.date.today()
d_today -= datetime.timedelta(days=30)
self.start_d = datetime.date(d_today.year, d_today.month, 1)
def test_info_history(self):
# always should have info and history for valid symbols
for ticker in tickers:
# always should have info and history for valid symbols
assert(ticker.info is not None and ticker.info != {})
history = ticker.history(period="max")
history = ticker.history(period="1mo")
assert(history.empty is False and history is not None)
histories = yf.download(symbols, period="1mo", session=session)
assert(histories.empty is False and histories is not None)
for ticker in tickers:
assert(ticker.info is not None and ticker.info != {})
history = ticker.history(start=self.start_d)
assert(history.empty is False and history is not None)
histories = yf.download(symbols, start=self.start_d, session=session)
assert(histories.empty is False and histories is not None)
def test_info_history_nofail(self):
# should not throw Exception for delisted tickers, just print a message
for ticker in delisted_tickers:
history = ticker.history(period="1mo")
histories = yf.download(delisted_symbols, period="1mo", session=session)
histories = yf.download(delisted_symbols[0], period="1mo", session=session)
histories = yf.download(delisted_symbols[1], period="1mo")#, session=session)
for ticker in delisted_tickers:
history = ticker.history(start=self.start_d)
histories = yf.download(delisted_symbols, start=self.start_d, session=session)
histories = yf.download(delisted_symbols[0], start=self.start_d, session=session)
histories = yf.download(delisted_symbols[1], start=self.start_d, session=session)
def test_attributes(self):
for ticker in tickers:
ticker.isin
ticker.major_holders
ticker.institutional_holders
ticker.mutualfund_holders
ticker.dividends
ticker.splits
ticker.actions
ticker.info
ticker.info["trailingPegRatio"]
ticker.calendar
ticker.recommendations
ticker.earnings
ticker.quarterly_earnings
ticker.financials
ticker.quarterly_financials
ticker.balance_sheet
ticker.quarterly_balance_sheet
ticker.cashflow
ticker.quarterly_cashflow
ticker.sustainability
ticker.options
ticker.news
ticker.shares
ticker.earnings_history
ticker.earnings_dates
def test_attributes_nofail(self):
# should not throw Exception for delisted tickers, just print a message
for ticker in delisted_tickers:
ticker.isin
ticker.major_holders
ticker.institutional_holders

View File

@@ -150,17 +150,14 @@ class TickerBase():
if start or period is None or period.lower() == "max":
# Check can get TZ. Fail => probably delisted
try:
tz = self._get_ticker_tz()
except KeyError as e:
if "exchangeTimezoneName" in str(e):
shared._DFS[self.ticker] = utils.empty_df()
shared._ERRORS[self.ticker] = err_msg
if "many" not in kwargs and debug_mode:
print('- %s: %s' % (self.ticker, err_msg))
return utils.empty_df()
else:
raise
tz = self._get_ticker_tz(debug_mode, proxy, timeout)
if tz is None:
# Every valid ticker has a timezone. Missing = problem
shared._DFS[self.ticker] = utils.empty_df()
shared._ERRORS[self.ticker] = err_msg
if "many" not in kwargs and debug_mode:
print('- %s: %s' % (self.ticker, err_msg))
return utils.empty_df()
if end is None:
end = int(_time.time())
@@ -170,7 +167,8 @@ class TickerBase():
if interval == "1m":
start = end - 604800 # Subtract 7 days
else:
start = -631159200
#time stamp of 01/01/1900
start = -2208994789
else:
start = utils._parse_user_dt(start, tz)
params = {"period1": start, "period2": end}
@@ -331,23 +329,79 @@ class TickerBase():
# ------------------------
def _get_ticker_tz(self):
def _get_ticker_tz(self, debug_mode, proxy, timeout):
if not self._tz is None:
return self._tz
tkr_tz = utils.cache_lookup_tkr_tz(self.ticker)
if tkr_tz is not None:
invalid_value = isinstance(tkr_tz, str)
if not invalid_value:
try:
_tz.timezone(tz)
except:
invalid_value = True
if invalid_value:
# Clear from cache and force re-fetch
utils.cache_store_tkr_tz(self.ticker, None)
tkr_tz = None
if tkr_tz is None:
tkr_tz = self.info["exchangeTimezoneName"]
# info fetch is relatively slow so cache timezone
try:
utils.cache_store_tkr_tz(self.ticker, tkr_tz)
except PermissionError:
# System probably read-only, so cannot cache
pass
tkr_tz = self._fetch_ticker_tz(debug_mode, proxy, timeout)
if tkr_tz is not None:
try:
utils.cache_store_tkr_tz(self.ticker, tkr_tz)
except PermissionError:
# System probably read-only, so cannot cache
pass
self._tz = tkr_tz
return tkr_tz
def _fetch_ticker_tz(self, debug_mode, proxy, timeout):
# Query Yahoo for basic price data just to get returned timezone
params = {"range":"1d", "interval":"1d"}
# setup proxy in requests format
if proxy is not None:
if isinstance(proxy, dict) and "https" in proxy:
proxy = proxy["https"]
proxy = {"https": proxy}
# Getting data from json
url = "{}/v8/finance/chart/{}".format(self._base_url, self.ticker)
session = self.session or _requests
try:
data = session.get(url=url, params=params, proxies=proxy, headers=utils.user_agent_headers, timeout=timeout)
data = data.json()
except Exception as e:
if debug_mode:
print("Failed to get ticker '{}' reason: {}".format(self.ticker, e))
return None
else:
error = data.get('chart', {}).get('error', None)
if error:
# explicit error from yahoo API
if debug_mode:
print("Got error from yahoo api for ticker {}, Error: {}".format(self.ticker, error))
else:
try:
return data["chart"]["result"][0]["meta"]["exchangeTimezoneName"]
except Exception as err:
if debug_mode:
print("Could not get exchangeTimezoneName for ticker '{}' reason: {}".format(self.ticker, err))
print("Got response: ")
print("-------------")
print(" {}".format(data))
print("-------------")
return None
def _get_info(self, proxy=None):
# setup proxy in requests format
if proxy is not None:
@@ -425,9 +479,12 @@ class TickerBase():
self._info['logo_url'] = ""
try:
domain = self._info['website'].split(
'://')[1].split('/')[0].replace('www.', '')
self._info['logo_url'] = 'https://logo.clearbit.com/%s' % domain
if not 'website' in self._info:
self._info['logo_url'] = 'https://logo.clearbit.com/%s.com' % self._info['shortName'].split(' ')[0].split(',')[0]
else:
domain = self._info['website'].split(
'://')[1].split('/')[0].replace('www.', '')
self._info['logo_url'] = 'https://logo.clearbit.com/%s' % domain
except Exception:
pass
@@ -457,6 +514,29 @@ class TickerBase():
except Exception:
pass
# Complementary key-statistics. For now just want 'trailing PEG ratio'
session = self.session or _requests
keys = {"trailingPegRatio"}
if len(keys)>0:
# For just one/few variable is faster to query directly:
url = "https://query1.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{}?symbol={}".format(self.ticker, self.ticker)
for k in keys:
url += "&type="+k
# Request 6 months of data
url += "&period1={}".format(int((_datetime.datetime.now()-_datetime.timedelta(days=365//2)).timestamp()))
url += "&period2={}".format(int((_datetime.datetime.now()+_datetime.timedelta(days=1)).timestamp()))
json_str = session.get(url=url, proxies=proxy, headers=utils.user_agent_headers).text
json_data = _json.loads(json_str)
key_stats = json_data["timeseries"]["result"][0]
if k not in key_stats:
# Yahoo website prints N/A, indicates Yahoo lacks necessary data to calculate
v = None
else:
# Select most recent (last) raw value in list:
v = key_stats[k][-1]["reportedValue"]["raw"]
self._info[k] = v
def _get_fundamentals(self, proxy=None):
def cleanup(data):
df = _pd.DataFrame(data).drop(columns=['maxAge'])
@@ -616,48 +696,6 @@ class TickerBase():
except Exception:
pass
# Complementary key-statistics (currently fetching the important trailingPegRatio which is the value shown in the website)
res = {}
try:
my_headers = {'user-agent': 'curl/7.55.1', 'accept': 'application/json', 'content-type': 'application/json',
'referer': 'https://finance.yahoo.com/', 'cache-control': 'no-cache', 'connection': 'close'}
p = _re.compile(r'root\.App\.main = (.*);')
r = _requests.session().get('https://finance.yahoo.com/quote/{}/key-statistics?p={}'.format(self.ticker,
self.ticker), headers=my_headers)
q_results = {}
my_qs_keys = ['pegRatio'] # QuoteSummaryStore
# , 'quarterlyPegRatio'] # QuoteTimeSeriesStore
my_ts_keys = ['trailingPegRatio']
# Complementary key-statistics
data = _json.loads(p.findall(r.text)[0])
key_stats = data['context']['dispatcher']['stores']['QuoteTimeSeriesStore']
q_results.setdefault(self.ticker, [])
for i in my_ts_keys:
# j=0
try:
# res = {i: key_stats['timeSeries'][i][1]['reportedValue']['raw']}
# We need to loop over multiple items, if they exist: 0,1,2,..
zzz = key_stats['timeSeries'][i]
for j in range(len(zzz)):
if key_stats['timeSeries'][i][j]:
res = {i: key_stats['timeSeries']
[i][j]['reportedValue']['raw']}
q_results[self.ticker].append(res)
# print(res)
# q_results[ticker].append(res)
except:
q_results[ticker].append({i: np.nan})
res = {'Company': ticker}
q_results[ticker].append(res)
except Exception:
pass
if 'trailingPegRatio' in res:
self._info['trailingPegRatio'] = res['trailingPegRatio']
self._fundamentals = True
def get_recommendations(self, proxy=None, as_dict=False, *args, **kwargs):
@@ -803,6 +841,10 @@ class TickerBase():
self.get_info(proxy=proxy)
if "shortName" in self._info:
q = self._info['shortName']
if q is None:
err_msg = "Cannot map to ISIN code, symbol may be delisted"
print('- %s: %s' % (self.ticker, err_msg))
return None
url = 'https://markets.businessinsider.com/ajax/' \
'SearchController_Suggest?max_results=25&query=%s' \
@@ -901,8 +943,10 @@ class TickerBase():
dates = _pd.concat([dates, data], axis=0)
page_offset += page_size
if dates is None:
raise Exception("No data found, symbol may be delisted")
if (dates is None) or dates.shape[0]==0:
err_msg = "No earnings dates found, symbol may be delisted"
print('- %s: %s' % (self.ticker, err_msg))
return None
dates = dates.reset_index(drop=True)
# Drop redundant columns

View File

@@ -31,6 +31,9 @@ import sys as _sys
import os as _os
import appdirs as _ad
from threading import Lock
mutex = Lock()
try:
import ujson as _json
except ImportError:
@@ -332,27 +335,36 @@ def cache_lookup_tkr_tz(tkr):
if not _os.path.isfile(fp):
return None
df = _pd.read_csv(fp)
f = df["Ticker"] == tkr
if sum(f) == 0:
mutex.acquire()
df = _pd.read_csv(fp, index_col="Ticker", on_bad_lines="skip")
mutex.release()
if tkr in df.index:
return df.loc[tkr,"Tz"]
else:
return None
return df["Tz"][f].iloc[0]
def cache_store_tkr_tz(tkr,tz):
df = _pd.DataFrame({"Ticker":[tkr], "Tz":[tz]})
dp = get_cache_dirpath()
fp = _os.path.join(dp, "tkr-tz.csv")
mutex.acquire()
if not _os.path.isdir(dp):
_os.makedirs(dp)
fp = _os.path.join(dp, "tkr-tz.csv")
if not _os.path.isfile(fp):
df.to_csv(fp, index=False)
return
if (not _os.path.isfile(fp)) and (tz is not None):
df = _pd.DataFrame({"Tz":[tz]}, index=[tkr])
df.index.name = "Ticker"
df.to_csv(fp)
df_all = _pd.read_csv(fp)
f = df_all["Ticker"]==tkr
if sum(f) > 0:
raise Exception("Tkr {} tz already in cache".format(tkr))
_pd.concat([df_all,df]).to_csv(fp, index=False)
else:
df = _pd.read_csv(fp, index_col="Ticker", on_bad_lines="skip")
if tz is None:
# Delete if in cache:
if tkr in df.index:
df.drop(tkr).to_csv(fp)
else:
if tkr in df.index:
raise Exception("Tkr {} tz already in cache".format(tkr))
df.loc[tkr,"Tz"] = tz
df.to_csv(fp)
mutex.release()

View File

@@ -1 +1 @@
version = "0.1.81"
version = "0.1.86"