Compare commits

...

13 Commits

Author SHA1 Message Date
Value Raider
88525abcbd Bump version to 0.2.29 2023-09-22 12:00:47 +01:00
ValueRaider
99ef055cc4 Merge pull request #1692 from ranaroussi/dev
sync dev -> main
2023-09-22 12:00:06 +01:00
ValueRaider
0f36f7980b Merge pull request #1688 from ranaroussi/fix/price-repair
Price repair fixes
2023-09-22 11:27:38 +01:00
ValueRaider
8282af9ce4 Merge pull request #1684 from ranaroussi/fix/prices-intraday-merge-events
Fix merging events with intraday prices
2023-09-22 11:24:30 +01:00
Value Raider
5208c8cf05 Price repair improvements
Price repair improvements:
- don't attempt repair of empty prices table
- random-mixups: fix 0.01x errors, not just 100x
- stop zeroes, big-dividends, and 100x errors triggering false split errors
2023-09-22 11:21:17 +01:00
Value Raider
d3dfb4c6a8 Fix merging events with intraday prices
If Yahoo returns intraday price data with dividend or stock-split event in future, then this broke the merge.
Fix is to discard out-of-range events.
Assumes that if user requesting intraday then they aren't interested in events.
2023-09-19 19:35:03 +01:00
ValueRaider
279726afe4 Merge pull request #1687 from arduinocc04/arduinocc04-patch-2
Fix error when calling enable_debug_mode twice
2023-09-19 17:41:28 +01:00
조다니엘(Daniel Cho)
937386f3ef Fix error when calling enable_debug_mode twice 2023-09-19 10:02:28 +09:00
ValueRaider
32e569f652 Merge pull request #1675 from ranaroussi/hotfix/database-error
Replace sqlite3 with peewee for 100% thread-safety
2023-09-17 21:47:57 +01:00
ValueRaider
de59f0b2c6 Bug template - add section to describe bug 2023-09-09 18:32:32 +01:00
Value Raider
7d6d8562e8 Replace sqlite3 with peewee for 100% thread-safety 2023-09-03 16:47:36 +01:00
ValueRaider
6cae6d45b1 Merge pull request #1672 from difelice/fix-fix_yahoo_returning_live_separate-warnings
Fix pandas warning when retrieving quotes.
2023-09-01 22:07:18 +01:00
Alessandro Di Felice
ec3de0710d Fix pandas warning when retrieving quotes 2023-09-01 14:07:53 -05:00
10 changed files with 257 additions and 211 deletions

View File

@@ -42,6 +42,13 @@ body:
Provide the following as best you can:
- type: textarea
id: summary
attributes:
label: "Describe bug"
validations:
required: true
- type: textarea
id: code
attributes:

View File

@@ -1,6 +1,14 @@
Change Log
===========
0.2.29
------
- Fix pandas warning when retrieving quotes. #1672
- Replace sqlite3 with peewee for 100% thread-safety #1675
- Fix merging events with intraday prices #1684
- Fix error when calling enable_debug_mode twice #1687
- Price repair fixes #1688
0.2.28
------
- Fix TypeError: 'FastInfo' object is not callable #1636

View File

@@ -258,6 +258,7 @@ To install `yfinance` using `conda`, see
- [frozendict](https://pypi.org/project/frozendict) \>= 2.3.4
- [beautifulsoup4](https://pypi.org/project/beautifulsoup4) \>= 4.11.1
- [html5lib](https://pypi.org/project/html5lib) \>= 1.1
- [peewee](https://pypi.org/project/peewee) \>= 3.16.2
#### Optional (if you want to use `pandas_datareader`)

View File

@@ -1,5 +1,5 @@
{% set name = "yfinance" %}
{% set version = "0.2.28" %}
{% set version = "0.2.29" %}
package:
name: "{{ name|lower }}"
@@ -26,6 +26,7 @@ requirements:
- frozendict >=2.3.4
- beautifulsoup4 >=4.11.1
- html5lib >=1.1
- peewee >=3.16.2
# - pycryptodome >=3.6.6
- pip
- python
@@ -41,6 +42,7 @@ requirements:
- frozendict >=2.3.4
- beautifulsoup4 >=4.11.1
- html5lib >=1.1
- peewee >=3.16.2
# - pycryptodome >=3.6.6
- python

View File

@@ -8,3 +8,4 @@ pytz>=2022.5
frozendict>=2.3.4
beautifulsoup4>=4.11.1
html5lib>=1.1
peewee>=3.16.2

View File

@@ -62,7 +62,7 @@ setup(
install_requires=['pandas>=1.3.0', 'numpy>=1.16.5',
'requests>=2.31', 'multitasking>=0.0.7',
'lxml>=4.9.1', 'appdirs>=1.4.4', 'pytz>=2022.5',
'frozendict>=2.3.4',
'frozendict>=2.3.4', 'peewee>=3.16.2',
'beautifulsoup4>=4.11.1', 'html5lib>=1.1'],
# Note: Pandas.read_html() needs html5lib & beautifulsoup4
entry_points={

View File

@@ -114,6 +114,43 @@ class TestPriceHistory(unittest.TestCase):
if not test_run:
self.skipTest("Skipping test_duplicatingWeekly() because not possible to fail Monday/weekend")
def test_pricesEventsMerge(self):
# Test case: dividend occurs after last row in price data
tkr = 'INTC'
start_d = _dt.date(2022, 1, 1)
end_d = _dt.date(2023, 1, 1)
df = yf.Ticker(tkr, session=self.session).history(interval='1d', start=start_d, end=end_d)
div = 1.0
future_div_dt = df.index[-1] + _dt.timedelta(days=1)
if future_div_dt.weekday() in [5, 6]:
future_div_dt += _dt.timedelta(days=1) * (7 - future_div_dt.weekday())
divs = _pd.DataFrame(data={"Dividends":[div]}, index=[future_div_dt])
df2 = yf.utils.safe_merge_dfs(df.drop(['Dividends', 'Stock Splits'], axis=1), divs, '1d')
self.assertIn(future_div_dt, df2.index)
self.assertIn("Dividends", df2.columns)
self.assertEqual(df2['Dividends'].iloc[-1], div)
def test_pricesEventsMerge_bug(self):
# Reproduce exception when merging intraday prices with future dividend
tkr = 'S32.AX'
interval = '30m'
df_index = []
d = 13
for h in range(0, 16):
for m in [0, 30]:
df_index.append(_dt.datetime(2023, 9, d, h, m))
df_index.append(_dt.datetime(2023, 9, d, 16))
df = _pd.DataFrame(index=df_index)
df.index = _pd.to_datetime(df.index)
df['Close'] = 1.0
div = 1.0
future_div_dt = _dt.datetime(2023, 9, 14, 10)
divs = _pd.DataFrame(data={"Dividends":[div]}, index=[future_div_dt])
df2 = yf.utils.safe_merge_dfs(df, divs, interval)
# No exception = test pass
def test_intraDayWithEvents(self):
tkrs = ["BHP.AX", "IMP.JO", "BP.L", "PNL.L", "INTC"]
test_run = False

View File

@@ -829,6 +829,8 @@ class TickerBase:
@utils.log_indent_decorator
def _fix_unit_mixups(self, df, interval, tz_exchange, prepost):
if df.empty:
return df
df2 = self._fix_unit_switch(df, interval, tz_exchange)
df3 = self._fix_unit_random_mixups(df2, interval, tz_exchange, prepost)
return df3
@@ -842,6 +844,9 @@ class TickerBase:
# - a sudden switch between $<->cents at some date
# This function fixes the first.
if df.empty:
return df
# Easy to detect and fix, just look for outliers = ~100x local median
logger = utils.get_yf_logger()
@@ -885,7 +890,11 @@ class TickerBase:
ratio = df2_data / median
ratio_rounded = (ratio / 20).round() * 20 # round ratio to nearest 20
f = ratio_rounded == 100
if not f.any():
ratio_rcp = 1.0/ratio
ratio_rcp_rounded = (ratio_rcp / 20).round() * 20 # round ratio to nearest 20
f_rcp = (ratio_rounded == 100) | (ratio_rcp_rounded == 100)
f_either = f | f_rcp
if not f_either.any():
logger.info("price-repair-100x: No sporadic 100x errors")
if "Repaired?" not in df.columns:
df["Repaired?"] = False
@@ -894,7 +903,7 @@ class TickerBase:
# Mark values to send for repair
tag = -1.0
for i in range(len(data_cols)):
fi = f[:, i]
fi = f_either[:, i]
c = data_cols[i]
df2.loc[fi, c] = tag
@@ -906,35 +915,43 @@ class TickerBase:
if n_after > 0:
# This second pass will *crudely* "fix" any remaining errors in High/Low
# simply by ensuring they don't contradict e.g. Low = 100x High.
f = df2_tagged
f = (df2[data_cols].to_numpy() == tag) & f
for i in range(f.shape[0]):
fi = f[i, :]
if not fi.any():
continue
idx = df2.index[i]
c = "Open"
j = data_cols.index(c)
if fi[j]:
df2.loc[idx, c] = df.loc[idx, c] * 0.01
#
c = "Close"
j = data_cols.index(c)
if fi[j]:
df2.loc[idx, c] = df.loc[idx, c] * 0.01
#
c = "Adj Close"
j = data_cols.index(c)
if fi[j]:
df2.loc[idx, c] = df.loc[idx, c] * 0.01
#
c = "High"
j = data_cols.index(c)
for c in ['Open', 'Close']:
j = data_cols.index(c)
if fi[j]:
df2.loc[idx, c] = df.loc[idx, c] * 0.01
c = "High" ; j = data_cols.index(c)
if fi[j]:
df2.loc[idx, c] = df2.loc[idx, ["Open", "Close"]].max()
#
c = "Low"
j = data_cols.index(c)
c = "Low" ; j = data_cols.index(c)
if fi[j]:
df2.loc[idx, c] = df2.loc[idx, ["Open", "Close"]].min()
f_rcp = (df2[data_cols].to_numpy() == tag) & f_rcp
for i in range(f_rcp.shape[0]):
fi = f_rcp[i, :]
if not fi.any():
continue
idx = df2.index[i]
for c in ['Open', 'Close']:
j = data_cols.index(c)
if fi[j]:
df2.loc[idx, c] = df.loc[idx, c] * 100.0
c = "High" ; j = data_cols.index(c)
if fi[j]:
df2.loc[idx, c] = df2.loc[idx, ["Open", "Close"]].max()
c = "Low" ; j = data_cols.index(c)
if fi[j]:
df2.loc[idx, c] = df2.loc[idx, ["Open", "Close"]].min()
@@ -953,9 +970,9 @@ class TickerBase:
logger.info('price-repair-100x: ' + report_msg)
# Restore original values where repair failed
f = df2_tagged
f_either = df2[data_cols].to_numpy() == tag
for j in range(len(data_cols)):
fj = f[:, j]
fj = f_either[:, j]
if fj.any():
c = data_cols[j]
df2.loc[fj, c] = df.loc[fj, c]
@@ -977,14 +994,6 @@ class TickerBase:
# This function fixes the second.
# Eventually Yahoo fixes but could take them 2 weeks.
# To detect, use 'bad split adjustment' algorithm. But only correct
# if no stock splits in data
f_splits = df['Stock Splits'].to_numpy() != 0.0
if f_splits.any():
utils.get_yf_logger().debug('price-repair-100x: Cannot check for chunked 100x errors because splits present')
return df
return self._fix_prices_sudden_change(df, interval, tz_exchange, 100.0)
@utils.log_indent_decorator
@@ -993,6 +1002,9 @@ class TickerBase:
# But most times when prices=0 or NaN returned is because no trades.
# Impossible to distinguish, so only attempt repair if few or rare.
if df.empty:
return df
logger = utils.get_yf_logger()
if df.shape[0] == 0:
@@ -1101,6 +1113,9 @@ class TickerBase:
# Easy to detect and correct BUT ONLY IF the data 'df' includes today's dividend.
# E.g. if fetching historic prices before todays dividend, then cannot fix.
if df.empty:
return df
logger = utils.get_yf_logger()
if df is None or df.empty:
@@ -1173,6 +1188,9 @@ class TickerBase:
# which direction to reverse adjustment - have to analyse prices and detect.
# Not difficult.
if df.empty:
return df
logger = utils.get_yf_logger()
interday = interval in ['1d', '1wk', '1mo', '3mo']
@@ -1198,6 +1216,9 @@ class TickerBase:
@utils.log_indent_decorator
def _fix_prices_sudden_change(self, df, interval, tz_exchange, change, correct_volume=False):
if df.empty:
return df
logger = utils.get_yf_logger()
df = df.sort_index(ascending=False)
@@ -1262,11 +1283,25 @@ class TickerBase:
# Avoid using 'Low' and 'High'. For multiday intervals, these can be
# very volatile so reduce ability to detect genuine stock split errors
_1d_change_x = np.full((n, 2), 1.0)
price_data = df2[['Open','Close']].replace(0.0, 1.0).to_numpy()
price_data = df2[['Open','Close']].to_numpy()
f_zero = price_data == 0.0
else:
_1d_change_x = np.full((n, 4), 1.0)
price_data = df2[OHLC].replace(0.0, 1.0).to_numpy()
price_data = df2[OHLC].to_numpy()
f_zero = price_data == 0.0
if f_zero.any():
price_data[f_zero] = 1.0
# Update: if a VERY large dividend is paid out, then can be mistaken for a 1:2 stock split.
# Fix = use adjusted prices
adj = df2['Adj Close'].to_numpy() / df2['Close'].to_numpy()
for j in range(price_data.shape[1]):
price_data[:,j] *= adj
_1d_change_x[1:] = price_data[1:, ] / price_data[:-1, ]
f_zero_num_denom = f_zero | np.roll(f_zero, 1, axis=0)
if f_zero_num_denom.any():
_1d_change_x[f_zero_num_denom] = 1.0
if interday and interval != '1d':
# average change
_1d_change_minx = np.average(_1d_change_x, axis=1)
@@ -1365,6 +1400,29 @@ class TickerBase:
logger.info(f'price-repair-split: No {fix_type}s detected')
return df
# Update: if any 100x changes are soon after a stock split, so could be confused with split error, then abort
threshold_days = 30
f_splits = df['Stock Splits'].to_numpy() != 0.0
if change in [100.0, 0.01] and f_splits.any():
indices_A = np.where(f_splits)[0]
indices_B = np.where(f)[0]
if not len(indices_A) or not len(indices_B):
return None
gaps = indices_B[:, None] - indices_A
# Because data is sorted in DEscending order, need to flip gaps
gaps *= -1
f_pos = gaps > 0
if f_pos.any():
gap_min = gaps[f_pos].min()
gap_td = utils._interval_to_timedelta(interval) * gap_min
if isinstance(gap_td, _dateutil.relativedelta.relativedelta):
threshold = _dateutil.relativedelta.relativedelta(days=threshold_days)
else:
threshold = _datetime.timedelta(days=threshold_days)
if gap_td < threshold:
logger.info(f'price-repair-split: 100x changes are too soon after stock split events, aborting')
return df
# if logger.isEnabledFor(logging.DEBUG):
# df_debug['i'] = list(range(0, df_debug.shape[0]))
# df_debug['i_rev'] = df_debug.shape[0]-1 - df_debug['i']

View File

@@ -27,7 +27,7 @@ import datetime as _datetime
import logging
import os as _os
import re as _re
import sqlite3 as _sqlite3
import peewee as _peewee
import sys as _sys
import threading
from functools import lru_cache
@@ -169,14 +169,15 @@ def setup_debug_formatting():
yf_logger.warning("logging mode not set to 'DEBUG', so not setting up debug formatting")
return
if yf_logger.handlers is None or len(yf_logger.handlers) == 0:
h = logging.StreamHandler()
# Ensure different level strings don't interfere with indentation
formatter = MultiLineFormatter(fmt='%(levelname)-8s %(message)s')
h.setFormatter(formatter)
yf_logger.addHandler(h)
global yf_log_indented
if not yf_log_indented:
if yf_logger.handlers is None or len(yf_logger.handlers) == 0:
h = logging.StreamHandler()
# Ensure different level strings don't interfere with indentation
formatter = MultiLineFormatter(fmt='%(levelname)-8s %(message)s')
h.setFormatter(formatter)
yf_logger.addHandler(h)
yf_log_indented = True
@@ -628,30 +629,32 @@ def fix_Yahoo_returning_live_separate(quotes, interval, tz_exchange):
# Yahoo is not returning live data (phew!)
return quotes
if _np.isnan(quotes.loc[idx2, "Open"]):
quotes.loc[idx2, "Open"] = quotes["Open"][n - 1]
quotes.loc[idx2, "Open"] = quotes["Open"].iloc[n - 1]
# Note: nanmax() & nanmin() ignores NaNs, but still need to check not all are NaN to avoid warnings
if not _np.isnan(quotes["High"][n - 1]):
quotes.loc[idx2, "High"] = _np.nanmax([quotes["High"][n - 1], quotes["High"][n - 2]])
if not _np.isnan(quotes["High"].iloc[n - 1]):
quotes.loc[idx2, "High"] = _np.nanmax([quotes["High"].iloc[n - 1], quotes["High"].iloc[n - 2]])
if "Adj High" in quotes.columns:
quotes.loc[idx2, "Adj High"] = _np.nanmax([quotes["Adj High"][n - 1], quotes["Adj High"][n - 2]])
quotes.loc[idx2, "Adj High"] = _np.nanmax([quotes["Adj High"].iloc[n - 1], quotes["Adj High"].iloc[n - 2]])
if not _np.isnan(quotes["Low"][n - 1]):
quotes.loc[idx2, "Low"] = _np.nanmin([quotes["Low"][n - 1], quotes["Low"][n - 2]])
if not _np.isnan(quotes["Low"].iloc[n - 1]):
quotes.loc[idx2, "Low"] = _np.nanmin([quotes["Low"].iloc[n - 1], quotes["Low"].iloc[n - 2]])
if "Adj Low" in quotes.columns:
quotes.loc[idx2, "Adj Low"] = _np.nanmin([quotes["Adj Low"][n - 1], quotes["Adj Low"][n - 2]])
quotes.loc[idx2, "Adj Low"] = _np.nanmin([quotes["Adj Low"].iloc[n - 1], quotes["Adj Low"].iloc[n - 2]])
quotes.loc[idx2, "Close"] = quotes["Close"][n - 1]
quotes.loc[idx2, "Close"] = quotes["Close"].iloc[n - 1]
if "Adj Close" in quotes.columns:
quotes.loc[idx2, "Adj Close"] = quotes["Adj Close"][n - 1]
quotes.loc[idx2, "Volume"] += quotes["Volume"][n - 1]
quotes.loc[idx2, "Adj Close"] = quotes["Adj Close"].iloc[n - 1]
quotes.loc[idx2, "Volume"] += quotes["Volume"].iloc[n - 1]
quotes = quotes.drop(quotes.index[n - 1])
return quotes
def safe_merge_dfs(df_main, df_sub, interval):
if df_sub.shape[0] == 0:
if df_sub.empty:
raise Exception("No data to merge")
if df_main.empty:
return df_main
df_sub_backup = df_sub.copy()
data_cols = [c for c in df_sub.columns if c not in df_main]
@@ -675,47 +678,54 @@ def safe_merge_dfs(df_main, df_sub, interval):
else:
indices = _np.searchsorted(_np.append(df_main.index, df_main.index[-1] + td), df_sub.index, side='right')
indices -= 1 # Convert from [[i-1], [i]) to [[i], [i+1])
# Numpy.searchsorted does not handle out-of-range well, so handle manually:
for i in range(len(df_sub.index)):
dt = df_sub.index[i]
if dt < df_main.index[0] or dt >= df_main.index[-1] + td:
# Out-of-range
indices[i] = -1
# Numpy.searchsorted does not handle out-of-range well, so handle manually:
for i in range(len(df_sub.index)):
dt = df_sub.index[i]
if dt < df_main.index[0] or dt >= df_main.index[-1] + td:
# Out-of-range
indices[i] = -1
f_outOfRange = indices == -1
if f_outOfRange.any() and not intraday:
empty_row_data = {c:[_np.nan] for c in const.price_colnames}|{'Volume':[0]}
if interval == '1d':
# For 1d, add all out-of-range event dates
for i in _np.where(f_outOfRange)[0]:
dt = df_sub.index[i]
get_yf_logger().debug(f"Adding out-of-range {data_col} @ {dt.date()} in new prices row of NaNs")
empty_row = _pd.DataFrame(data=empty_row_data, index=[dt])
df_main = _pd.concat([df_main, empty_row], sort=True)
if f_outOfRange.any():
if intraday:
# Discard out-of-range dividends in intraday data, assume user not interested
df_sub = df_sub[~f_outOfRange]
if df_sub.empty:
df_main['Dividends'] = 0.0
return df_main
else:
# Else, only add out-of-range event dates if occurring in interval
# immediately after last pricfe row
last_dt = df_main.index[-1]
next_interval_start_dt = last_dt + td
next_interval_end_dt = next_interval_start_dt + td
for i in _np.where(f_outOfRange)[0]:
dt = df_sub.index[i]
if next_interval_start_dt <= dt < next_interval_end_dt:
new_dt = next_interval_start_dt
empty_row_data = {c:[_np.nan] for c in const.price_colnames}|{'Volume':[0]}
if interval == '1d':
# For 1d, add all out-of-range event dates
for i in _np.where(f_outOfRange)[0]:
dt = df_sub.index[i]
get_yf_logger().debug(f"Adding out-of-range {data_col} @ {dt.date()} in new prices row of NaNs")
empty_row = _pd.DataFrame(data=empty_row_data, index=[dt])
df_main = _pd.concat([df_main, empty_row], sort=True)
df_main = df_main.sort_index()
else:
# Else, only add out-of-range event dates if occurring in interval
# immediately after last price row
last_dt = df_main.index[-1]
next_interval_start_dt = last_dt + td
next_interval_end_dt = next_interval_start_dt + td
for i in _np.where(f_outOfRange)[0]:
dt = df_sub.index[i]
if next_interval_start_dt <= dt < next_interval_end_dt:
new_dt = next_interval_start_dt
get_yf_logger().debug(f"Adding out-of-range {data_col} @ {dt.date()} in new prices row of NaNs")
empty_row = _pd.DataFrame(data=empty_row_data, index=[dt])
df_main = _pd.concat([df_main, empty_row], sort=True)
df_main = df_main.sort_index()
# Re-calculate indices
indices = _np.searchsorted(_np.append(df_main.index, df_main.index[-1] + td), df_sub.index, side='right')
indices -= 1 # Convert from [[i-1], [i]) to [[i], [i+1])
# Numpy.searchsorted does not handle out-of-range well, so handle manually:
for i in range(len(df_sub.index)):
dt = df_sub.index[i]
if dt < df_main.index[0] or dt >= df_main.index[-1] + td:
# Out-of-range
indices[i] = -1
# Re-calculate indices
indices = _np.searchsorted(_np.append(df_main.index, df_main.index[-1] + td), df_sub.index, side='right')
indices -= 1 # Convert from [[i-1], [i]) to [[i], [i+1])
# Numpy.searchsorted does not handle out-of-range well, so handle manually:
for i in range(len(df_sub.index)):
dt = df_sub.index[i]
if dt < df_main.index[0] or dt >= df_main.index[-1] + td:
# Out-of-range
indices[i] = -1
f_outOfRange = indices == -1
if f_outOfRange.any():
@@ -891,136 +901,60 @@ class ProgressBar:
# TimeZone cache related code
# ---------------------------------
class _KVStore:
"""Simple Sqlite backed key/value store, key and value are strings. Should be thread safe."""
def __init__(self, filename):
self._cache_mutex = Lock()
with self._cache_mutex:
self.conn = _sqlite3.connect(filename, timeout=10, check_same_thread=False)
self.conn.execute('pragma journal_mode=wal')
try:
self.conn.execute('create table if not exists "kv" (key TEXT primary key, value TEXT) without rowid')
except Exception as e:
if 'near "without": syntax error' in str(e):
# "without rowid" requires sqlite 3.8.2. Older versions will raise exception
self.conn.execute('create table if not exists "kv" (key TEXT primary key, value TEXT)')
else:
raise
self.conn.commit()
_atexit.register(self.close)
def close(self):
if self.conn is not None:
with self._cache_mutex:
self.conn.close()
self.conn = None
def get(self, key: str) -> Union[str, None]:
"""Get value for key if it exists else returns None"""
try:
item = self.conn.execute('select value from "kv" where key=?', (key,))
except _sqlite3.IntegrityError as e:
self.delete(key)
return None
if item:
return next(item, (None,))[0]
def set(self, key: str, value: str) -> None:
if value is None:
self.delete(key)
else:
with self._cache_mutex:
self.conn.execute('replace into "kv" (key, value) values (?,?)', (key, value))
self.conn.commit()
def bulk_set(self, kvdata: Dict[str, str]):
records = tuple(i for i in kvdata.items())
with self._cache_mutex:
self.conn.executemany('replace into "kv" (key, value) values (?,?)', records)
self.conn.commit()
def delete(self, key: str):
with self._cache_mutex:
self.conn.execute('delete from "kv" where key=?', (key,))
self.conn.commit()
_cache_dir = _os.path.join(_ad.user_cache_dir(), "py-yfinance")
DB_PATH = _os.path.join(_cache_dir, 'tkr-tz.db')
db = _peewee.SqliteDatabase(DB_PATH, pragmas={'journal_mode': 'wal', 'cache_size': -64})
_tz_cache = None
class _TzCacheException(Exception):
pass
class KV(_peewee.Model):
key = _peewee.CharField(primary_key=True)
value = _peewee.CharField(null=True)
class Meta:
database = db
without_rowid = True
class _TzCache:
"""Simple sqlite file cache of ticker->timezone"""
def __init__(self):
self._setup_cache_folder()
# Must init db here, where is thread-safe
db.connect()
db.create_tables([KV])
old_cache_file_path = _os.path.join(_cache_dir, "tkr-tz.csv")
if _os.path.isfile(old_cache_file_path):
_os.remove(old_cache_file_path)
def lookup(self, key):
try:
self._tz_db = _KVStore(_os.path.join(self._db_dir, "tkr-tz.db"))
except _sqlite3.DatabaseError as err:
raise _TzCacheException(f"Error creating TzCache folder: '{self._db_dir}' reason: {err}")
self._migrate_cache_tkr_tz()
def _setup_cache_folder(self):
if not _os.path.isdir(self._db_dir):
try:
_os.makedirs(self._db_dir)
except OSError as err:
raise _TzCacheException(f"Error creating TzCache folder: '{self._db_dir}' reason: {err}")
elif not (_os.access(self._db_dir, _os.R_OK) and _os.access(self._db_dir, _os.W_OK)):
raise _TzCacheException(f"Cannot read and write in TzCache folder: '{self._db_dir}'")
def lookup(self, tkr):
return self.tz_db.get(tkr)
def store(self, tkr, tz):
if tz is None:
self.tz_db.delete(tkr)
else:
tz_db = self.tz_db.get(tkr)
if tz_db is not None:
if tz != tz_db:
get_yf_logger().debug(f'{tkr}: Overwriting cached TZ "{tz_db}" with different TZ "{tz}"')
self.tz_db.set(tkr, tz)
else:
self.tz_db.set(tkr, tz)
@property
def _db_dir(self):
global _cache_dir
return _os.path.join(_cache_dir, "py-yfinance")
@property
def tz_db(self):
return self._tz_db
def _migrate_cache_tkr_tz(self):
"""Migrate contents from old ticker CSV-cache to SQLite db"""
old_cache_file_path = _os.path.join(self._db_dir, "tkr-tz.csv")
if not _os.path.isfile(old_cache_file_path):
return KV.get(KV.key == key).value
except KV.DoesNotExist:
return None
try:
df = _pd.read_csv(old_cache_file_path, index_col="Ticker", on_bad_lines="skip")
except _pd.errors.EmptyDataError:
_os.remove(old_cache_file_path)
except TypeError:
_os.remove(old_cache_file_path)
else:
# Discard corrupt data:
df = df[~df["Tz"].isna().to_numpy()]
df = df[~(df["Tz"] == '').to_numpy()]
df = df[~df.index.isna()]
if not df.empty:
try:
self.tz_db.bulk_set(df.to_dict()['Tz'])
except Exception as e:
# Ignore
pass
_os.remove(old_cache_file_path)
def store(self, key, value):
try:
if value is None:
q = KV.delete().where(KV.key == key)
q.execute()
return
with db.atomic():
KV.insert(key=key, value=value).execute()
except IntegrityError:
# Integrity error means the key already exists. Try updating the key.
old_value = self.lookup(key)
if old_value != value:
get_yf_logger().debug(f"Value for key {key} changed from {old_value} to {value}.")
with db.atomic():
q = KV.update(value=value).where(KV.key == key)
q.execute()
def close(self):
db.close()
class _TzCacheDummy:
@@ -1058,9 +992,7 @@ def get_tz_cache():
return _tz_cache
_cache_dir = _ad.user_cache_dir()
_cache_init_lock = Lock()
_tz_cache = None
def set_tz_cache_location(cache_dir: str):

View File

@@ -1 +1 @@
version = "0.2.28"
version = "0.2.29"