Compare commits
13 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
88525abcbd | ||
|
|
99ef055cc4 | ||
|
|
0f36f7980b | ||
|
|
8282af9ce4 | ||
|
|
5208c8cf05 | ||
|
|
d3dfb4c6a8 | ||
|
|
279726afe4 | ||
|
|
937386f3ef | ||
|
|
32e569f652 | ||
|
|
de59f0b2c6 | ||
|
|
7d6d8562e8 | ||
|
|
6cae6d45b1 | ||
|
|
ec3de0710d |
7
.github/ISSUE_TEMPLATE/bug_report.yaml
vendored
7
.github/ISSUE_TEMPLATE/bug_report.yaml
vendored
@@ -42,6 +42,13 @@ body:
|
||||
|
||||
Provide the following as best you can:
|
||||
|
||||
- type: textarea
|
||||
id: summary
|
||||
attributes:
|
||||
label: "Describe bug"
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: code
|
||||
attributes:
|
||||
|
||||
@@ -1,6 +1,14 @@
|
||||
Change Log
|
||||
===========
|
||||
|
||||
0.2.29
|
||||
------
|
||||
- Fix pandas warning when retrieving quotes. #1672
|
||||
- Replace sqlite3 with peewee for 100% thread-safety #1675
|
||||
- Fix merging events with intraday prices #1684
|
||||
- Fix error when calling enable_debug_mode twice #1687
|
||||
- Price repair fixes #1688
|
||||
|
||||
0.2.28
|
||||
------
|
||||
- Fix TypeError: 'FastInfo' object is not callable #1636
|
||||
|
||||
@@ -258,6 +258,7 @@ To install `yfinance` using `conda`, see
|
||||
- [frozendict](https://pypi.org/project/frozendict) \>= 2.3.4
|
||||
- [beautifulsoup4](https://pypi.org/project/beautifulsoup4) \>= 4.11.1
|
||||
- [html5lib](https://pypi.org/project/html5lib) \>= 1.1
|
||||
- [peewee](https://pypi.org/project/peewee) \>= 3.16.2
|
||||
|
||||
#### Optional (if you want to use `pandas_datareader`)
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
{% set name = "yfinance" %}
|
||||
{% set version = "0.2.28" %}
|
||||
{% set version = "0.2.29" %}
|
||||
|
||||
package:
|
||||
name: "{{ name|lower }}"
|
||||
@@ -26,6 +26,7 @@ requirements:
|
||||
- frozendict >=2.3.4
|
||||
- beautifulsoup4 >=4.11.1
|
||||
- html5lib >=1.1
|
||||
- peewee >=3.16.2
|
||||
# - pycryptodome >=3.6.6
|
||||
- pip
|
||||
- python
|
||||
@@ -41,6 +42,7 @@ requirements:
|
||||
- frozendict >=2.3.4
|
||||
- beautifulsoup4 >=4.11.1
|
||||
- html5lib >=1.1
|
||||
- peewee >=3.16.2
|
||||
# - pycryptodome >=3.6.6
|
||||
- python
|
||||
|
||||
|
||||
@@ -8,3 +8,4 @@ pytz>=2022.5
|
||||
frozendict>=2.3.4
|
||||
beautifulsoup4>=4.11.1
|
||||
html5lib>=1.1
|
||||
peewee>=3.16.2
|
||||
2
setup.py
2
setup.py
@@ -62,7 +62,7 @@ setup(
|
||||
install_requires=['pandas>=1.3.0', 'numpy>=1.16.5',
|
||||
'requests>=2.31', 'multitasking>=0.0.7',
|
||||
'lxml>=4.9.1', 'appdirs>=1.4.4', 'pytz>=2022.5',
|
||||
'frozendict>=2.3.4',
|
||||
'frozendict>=2.3.4', 'peewee>=3.16.2',
|
||||
'beautifulsoup4>=4.11.1', 'html5lib>=1.1'],
|
||||
# Note: Pandas.read_html() needs html5lib & beautifulsoup4
|
||||
entry_points={
|
||||
|
||||
@@ -114,6 +114,43 @@ class TestPriceHistory(unittest.TestCase):
|
||||
if not test_run:
|
||||
self.skipTest("Skipping test_duplicatingWeekly() because not possible to fail Monday/weekend")
|
||||
|
||||
def test_pricesEventsMerge(self):
|
||||
# Test case: dividend occurs after last row in price data
|
||||
tkr = 'INTC'
|
||||
start_d = _dt.date(2022, 1, 1)
|
||||
end_d = _dt.date(2023, 1, 1)
|
||||
df = yf.Ticker(tkr, session=self.session).history(interval='1d', start=start_d, end=end_d)
|
||||
div = 1.0
|
||||
future_div_dt = df.index[-1] + _dt.timedelta(days=1)
|
||||
if future_div_dt.weekday() in [5, 6]:
|
||||
future_div_dt += _dt.timedelta(days=1) * (7 - future_div_dt.weekday())
|
||||
divs = _pd.DataFrame(data={"Dividends":[div]}, index=[future_div_dt])
|
||||
df2 = yf.utils.safe_merge_dfs(df.drop(['Dividends', 'Stock Splits'], axis=1), divs, '1d')
|
||||
self.assertIn(future_div_dt, df2.index)
|
||||
self.assertIn("Dividends", df2.columns)
|
||||
self.assertEqual(df2['Dividends'].iloc[-1], div)
|
||||
|
||||
def test_pricesEventsMerge_bug(self):
|
||||
# Reproduce exception when merging intraday prices with future dividend
|
||||
tkr = 'S32.AX'
|
||||
interval = '30m'
|
||||
df_index = []
|
||||
d = 13
|
||||
for h in range(0, 16):
|
||||
for m in [0, 30]:
|
||||
df_index.append(_dt.datetime(2023, 9, d, h, m))
|
||||
df_index.append(_dt.datetime(2023, 9, d, 16))
|
||||
df = _pd.DataFrame(index=df_index)
|
||||
df.index = _pd.to_datetime(df.index)
|
||||
df['Close'] = 1.0
|
||||
|
||||
div = 1.0
|
||||
future_div_dt = _dt.datetime(2023, 9, 14, 10)
|
||||
divs = _pd.DataFrame(data={"Dividends":[div]}, index=[future_div_dt])
|
||||
|
||||
df2 = yf.utils.safe_merge_dfs(df, divs, interval)
|
||||
# No exception = test pass
|
||||
|
||||
def test_intraDayWithEvents(self):
|
||||
tkrs = ["BHP.AX", "IMP.JO", "BP.L", "PNL.L", "INTC"]
|
||||
test_run = False
|
||||
|
||||
128
yfinance/base.py
128
yfinance/base.py
@@ -829,6 +829,8 @@ class TickerBase:
|
||||
|
||||
@utils.log_indent_decorator
|
||||
def _fix_unit_mixups(self, df, interval, tz_exchange, prepost):
|
||||
if df.empty:
|
||||
return df
|
||||
df2 = self._fix_unit_switch(df, interval, tz_exchange)
|
||||
df3 = self._fix_unit_random_mixups(df2, interval, tz_exchange, prepost)
|
||||
return df3
|
||||
@@ -842,6 +844,9 @@ class TickerBase:
|
||||
# - a sudden switch between $<->cents at some date
|
||||
# This function fixes the first.
|
||||
|
||||
if df.empty:
|
||||
return df
|
||||
|
||||
# Easy to detect and fix, just look for outliers = ~100x local median
|
||||
logger = utils.get_yf_logger()
|
||||
|
||||
@@ -885,7 +890,11 @@ class TickerBase:
|
||||
ratio = df2_data / median
|
||||
ratio_rounded = (ratio / 20).round() * 20 # round ratio to nearest 20
|
||||
f = ratio_rounded == 100
|
||||
if not f.any():
|
||||
ratio_rcp = 1.0/ratio
|
||||
ratio_rcp_rounded = (ratio_rcp / 20).round() * 20 # round ratio to nearest 20
|
||||
f_rcp = (ratio_rounded == 100) | (ratio_rcp_rounded == 100)
|
||||
f_either = f | f_rcp
|
||||
if not f_either.any():
|
||||
logger.info("price-repair-100x: No sporadic 100x errors")
|
||||
if "Repaired?" not in df.columns:
|
||||
df["Repaired?"] = False
|
||||
@@ -894,7 +903,7 @@ class TickerBase:
|
||||
# Mark values to send for repair
|
||||
tag = -1.0
|
||||
for i in range(len(data_cols)):
|
||||
fi = f[:, i]
|
||||
fi = f_either[:, i]
|
||||
c = data_cols[i]
|
||||
df2.loc[fi, c] = tag
|
||||
|
||||
@@ -906,35 +915,43 @@ class TickerBase:
|
||||
if n_after > 0:
|
||||
# This second pass will *crudely* "fix" any remaining errors in High/Low
|
||||
# simply by ensuring they don't contradict e.g. Low = 100x High.
|
||||
f = df2_tagged
|
||||
f = (df2[data_cols].to_numpy() == tag) & f
|
||||
for i in range(f.shape[0]):
|
||||
fi = f[i, :]
|
||||
if not fi.any():
|
||||
continue
|
||||
idx = df2.index[i]
|
||||
|
||||
c = "Open"
|
||||
j = data_cols.index(c)
|
||||
if fi[j]:
|
||||
df2.loc[idx, c] = df.loc[idx, c] * 0.01
|
||||
#
|
||||
c = "Close"
|
||||
j = data_cols.index(c)
|
||||
if fi[j]:
|
||||
df2.loc[idx, c] = df.loc[idx, c] * 0.01
|
||||
#
|
||||
c = "Adj Close"
|
||||
j = data_cols.index(c)
|
||||
if fi[j]:
|
||||
df2.loc[idx, c] = df.loc[idx, c] * 0.01
|
||||
#
|
||||
c = "High"
|
||||
j = data_cols.index(c)
|
||||
for c in ['Open', 'Close']:
|
||||
j = data_cols.index(c)
|
||||
if fi[j]:
|
||||
df2.loc[idx, c] = df.loc[idx, c] * 0.01
|
||||
|
||||
c = "High" ; j = data_cols.index(c)
|
||||
if fi[j]:
|
||||
df2.loc[idx, c] = df2.loc[idx, ["Open", "Close"]].max()
|
||||
#
|
||||
c = "Low"
|
||||
j = data_cols.index(c)
|
||||
|
||||
c = "Low" ; j = data_cols.index(c)
|
||||
if fi[j]:
|
||||
df2.loc[idx, c] = df2.loc[idx, ["Open", "Close"]].min()
|
||||
|
||||
f_rcp = (df2[data_cols].to_numpy() == tag) & f_rcp
|
||||
for i in range(f_rcp.shape[0]):
|
||||
fi = f_rcp[i, :]
|
||||
if not fi.any():
|
||||
continue
|
||||
idx = df2.index[i]
|
||||
|
||||
for c in ['Open', 'Close']:
|
||||
j = data_cols.index(c)
|
||||
if fi[j]:
|
||||
df2.loc[idx, c] = df.loc[idx, c] * 100.0
|
||||
|
||||
c = "High" ; j = data_cols.index(c)
|
||||
if fi[j]:
|
||||
df2.loc[idx, c] = df2.loc[idx, ["Open", "Close"]].max()
|
||||
|
||||
c = "Low" ; j = data_cols.index(c)
|
||||
if fi[j]:
|
||||
df2.loc[idx, c] = df2.loc[idx, ["Open", "Close"]].min()
|
||||
|
||||
@@ -953,9 +970,9 @@ class TickerBase:
|
||||
logger.info('price-repair-100x: ' + report_msg)
|
||||
|
||||
# Restore original values where repair failed
|
||||
f = df2_tagged
|
||||
f_either = df2[data_cols].to_numpy() == tag
|
||||
for j in range(len(data_cols)):
|
||||
fj = f[:, j]
|
||||
fj = f_either[:, j]
|
||||
if fj.any():
|
||||
c = data_cols[j]
|
||||
df2.loc[fj, c] = df.loc[fj, c]
|
||||
@@ -977,14 +994,6 @@ class TickerBase:
|
||||
# This function fixes the second.
|
||||
# Eventually Yahoo fixes but could take them 2 weeks.
|
||||
|
||||
# To detect, use 'bad split adjustment' algorithm. But only correct
|
||||
# if no stock splits in data
|
||||
|
||||
f_splits = df['Stock Splits'].to_numpy() != 0.0
|
||||
if f_splits.any():
|
||||
utils.get_yf_logger().debug('price-repair-100x: Cannot check for chunked 100x errors because splits present')
|
||||
return df
|
||||
|
||||
return self._fix_prices_sudden_change(df, interval, tz_exchange, 100.0)
|
||||
|
||||
@utils.log_indent_decorator
|
||||
@@ -993,6 +1002,9 @@ class TickerBase:
|
||||
# But most times when prices=0 or NaN returned is because no trades.
|
||||
# Impossible to distinguish, so only attempt repair if few or rare.
|
||||
|
||||
if df.empty:
|
||||
return df
|
||||
|
||||
logger = utils.get_yf_logger()
|
||||
|
||||
if df.shape[0] == 0:
|
||||
@@ -1101,6 +1113,9 @@ class TickerBase:
|
||||
# Easy to detect and correct BUT ONLY IF the data 'df' includes today's dividend.
|
||||
# E.g. if fetching historic prices before todays dividend, then cannot fix.
|
||||
|
||||
if df.empty:
|
||||
return df
|
||||
|
||||
logger = utils.get_yf_logger()
|
||||
|
||||
if df is None or df.empty:
|
||||
@@ -1173,6 +1188,9 @@ class TickerBase:
|
||||
# which direction to reverse adjustment - have to analyse prices and detect.
|
||||
# Not difficult.
|
||||
|
||||
if df.empty:
|
||||
return df
|
||||
|
||||
logger = utils.get_yf_logger()
|
||||
|
||||
interday = interval in ['1d', '1wk', '1mo', '3mo']
|
||||
@@ -1198,6 +1216,9 @@ class TickerBase:
|
||||
|
||||
@utils.log_indent_decorator
|
||||
def _fix_prices_sudden_change(self, df, interval, tz_exchange, change, correct_volume=False):
|
||||
if df.empty:
|
||||
return df
|
||||
|
||||
logger = utils.get_yf_logger()
|
||||
|
||||
df = df.sort_index(ascending=False)
|
||||
@@ -1262,11 +1283,25 @@ class TickerBase:
|
||||
# Avoid using 'Low' and 'High'. For multiday intervals, these can be
|
||||
# very volatile so reduce ability to detect genuine stock split errors
|
||||
_1d_change_x = np.full((n, 2), 1.0)
|
||||
price_data = df2[['Open','Close']].replace(0.0, 1.0).to_numpy()
|
||||
price_data = df2[['Open','Close']].to_numpy()
|
||||
f_zero = price_data == 0.0
|
||||
else:
|
||||
_1d_change_x = np.full((n, 4), 1.0)
|
||||
price_data = df2[OHLC].replace(0.0, 1.0).to_numpy()
|
||||
price_data = df2[OHLC].to_numpy()
|
||||
f_zero = price_data == 0.0
|
||||
if f_zero.any():
|
||||
price_data[f_zero] = 1.0
|
||||
|
||||
# Update: if a VERY large dividend is paid out, then can be mistaken for a 1:2 stock split.
|
||||
# Fix = use adjusted prices
|
||||
adj = df2['Adj Close'].to_numpy() / df2['Close'].to_numpy()
|
||||
for j in range(price_data.shape[1]):
|
||||
price_data[:,j] *= adj
|
||||
|
||||
_1d_change_x[1:] = price_data[1:, ] / price_data[:-1, ]
|
||||
f_zero_num_denom = f_zero | np.roll(f_zero, 1, axis=0)
|
||||
if f_zero_num_denom.any():
|
||||
_1d_change_x[f_zero_num_denom] = 1.0
|
||||
if interday and interval != '1d':
|
||||
# average change
|
||||
_1d_change_minx = np.average(_1d_change_x, axis=1)
|
||||
@@ -1365,6 +1400,29 @@ class TickerBase:
|
||||
logger.info(f'price-repair-split: No {fix_type}s detected')
|
||||
return df
|
||||
|
||||
# Update: if any 100x changes are soon after a stock split, so could be confused with split error, then abort
|
||||
threshold_days = 30
|
||||
f_splits = df['Stock Splits'].to_numpy() != 0.0
|
||||
if change in [100.0, 0.01] and f_splits.any():
|
||||
indices_A = np.where(f_splits)[0]
|
||||
indices_B = np.where(f)[0]
|
||||
if not len(indices_A) or not len(indices_B):
|
||||
return None
|
||||
gaps = indices_B[:, None] - indices_A
|
||||
# Because data is sorted in DEscending order, need to flip gaps
|
||||
gaps *= -1
|
||||
f_pos = gaps > 0
|
||||
if f_pos.any():
|
||||
gap_min = gaps[f_pos].min()
|
||||
gap_td = utils._interval_to_timedelta(interval) * gap_min
|
||||
if isinstance(gap_td, _dateutil.relativedelta.relativedelta):
|
||||
threshold = _dateutil.relativedelta.relativedelta(days=threshold_days)
|
||||
else:
|
||||
threshold = _datetime.timedelta(days=threshold_days)
|
||||
if gap_td < threshold:
|
||||
logger.info(f'price-repair-split: 100x changes are too soon after stock split events, aborting')
|
||||
return df
|
||||
|
||||
# if logger.isEnabledFor(logging.DEBUG):
|
||||
# df_debug['i'] = list(range(0, df_debug.shape[0]))
|
||||
# df_debug['i_rev'] = df_debug.shape[0]-1 - df_debug['i']
|
||||
|
||||
@@ -27,7 +27,7 @@ import datetime as _datetime
|
||||
import logging
|
||||
import os as _os
|
||||
import re as _re
|
||||
import sqlite3 as _sqlite3
|
||||
import peewee as _peewee
|
||||
import sys as _sys
|
||||
import threading
|
||||
from functools import lru_cache
|
||||
@@ -169,14 +169,15 @@ def setup_debug_formatting():
|
||||
yf_logger.warning("logging mode not set to 'DEBUG', so not setting up debug formatting")
|
||||
return
|
||||
|
||||
if yf_logger.handlers is None or len(yf_logger.handlers) == 0:
|
||||
h = logging.StreamHandler()
|
||||
# Ensure different level strings don't interfere with indentation
|
||||
formatter = MultiLineFormatter(fmt='%(levelname)-8s %(message)s')
|
||||
h.setFormatter(formatter)
|
||||
yf_logger.addHandler(h)
|
||||
|
||||
global yf_log_indented
|
||||
if not yf_log_indented:
|
||||
if yf_logger.handlers is None or len(yf_logger.handlers) == 0:
|
||||
h = logging.StreamHandler()
|
||||
# Ensure different level strings don't interfere with indentation
|
||||
formatter = MultiLineFormatter(fmt='%(levelname)-8s %(message)s')
|
||||
h.setFormatter(formatter)
|
||||
yf_logger.addHandler(h)
|
||||
|
||||
yf_log_indented = True
|
||||
|
||||
|
||||
@@ -628,30 +629,32 @@ def fix_Yahoo_returning_live_separate(quotes, interval, tz_exchange):
|
||||
# Yahoo is not returning live data (phew!)
|
||||
return quotes
|
||||
if _np.isnan(quotes.loc[idx2, "Open"]):
|
||||
quotes.loc[idx2, "Open"] = quotes["Open"][n - 1]
|
||||
quotes.loc[idx2, "Open"] = quotes["Open"].iloc[n - 1]
|
||||
# Note: nanmax() & nanmin() ignores NaNs, but still need to check not all are NaN to avoid warnings
|
||||
if not _np.isnan(quotes["High"][n - 1]):
|
||||
quotes.loc[idx2, "High"] = _np.nanmax([quotes["High"][n - 1], quotes["High"][n - 2]])
|
||||
if not _np.isnan(quotes["High"].iloc[n - 1]):
|
||||
quotes.loc[idx2, "High"] = _np.nanmax([quotes["High"].iloc[n - 1], quotes["High"].iloc[n - 2]])
|
||||
if "Adj High" in quotes.columns:
|
||||
quotes.loc[idx2, "Adj High"] = _np.nanmax([quotes["Adj High"][n - 1], quotes["Adj High"][n - 2]])
|
||||
quotes.loc[idx2, "Adj High"] = _np.nanmax([quotes["Adj High"].iloc[n - 1], quotes["Adj High"].iloc[n - 2]])
|
||||
|
||||
if not _np.isnan(quotes["Low"][n - 1]):
|
||||
quotes.loc[idx2, "Low"] = _np.nanmin([quotes["Low"][n - 1], quotes["Low"][n - 2]])
|
||||
if not _np.isnan(quotes["Low"].iloc[n - 1]):
|
||||
quotes.loc[idx2, "Low"] = _np.nanmin([quotes["Low"].iloc[n - 1], quotes["Low"].iloc[n - 2]])
|
||||
if "Adj Low" in quotes.columns:
|
||||
quotes.loc[idx2, "Adj Low"] = _np.nanmin([quotes["Adj Low"][n - 1], quotes["Adj Low"][n - 2]])
|
||||
quotes.loc[idx2, "Adj Low"] = _np.nanmin([quotes["Adj Low"].iloc[n - 1], quotes["Adj Low"].iloc[n - 2]])
|
||||
|
||||
quotes.loc[idx2, "Close"] = quotes["Close"][n - 1]
|
||||
quotes.loc[idx2, "Close"] = quotes["Close"].iloc[n - 1]
|
||||
if "Adj Close" in quotes.columns:
|
||||
quotes.loc[idx2, "Adj Close"] = quotes["Adj Close"][n - 1]
|
||||
quotes.loc[idx2, "Volume"] += quotes["Volume"][n - 1]
|
||||
quotes.loc[idx2, "Adj Close"] = quotes["Adj Close"].iloc[n - 1]
|
||||
quotes.loc[idx2, "Volume"] += quotes["Volume"].iloc[n - 1]
|
||||
quotes = quotes.drop(quotes.index[n - 1])
|
||||
|
||||
return quotes
|
||||
|
||||
|
||||
def safe_merge_dfs(df_main, df_sub, interval):
|
||||
if df_sub.shape[0] == 0:
|
||||
if df_sub.empty:
|
||||
raise Exception("No data to merge")
|
||||
if df_main.empty:
|
||||
return df_main
|
||||
|
||||
df_sub_backup = df_sub.copy()
|
||||
data_cols = [c for c in df_sub.columns if c not in df_main]
|
||||
@@ -675,47 +678,54 @@ def safe_merge_dfs(df_main, df_sub, interval):
|
||||
else:
|
||||
indices = _np.searchsorted(_np.append(df_main.index, df_main.index[-1] + td), df_sub.index, side='right')
|
||||
indices -= 1 # Convert from [[i-1], [i]) to [[i], [i+1])
|
||||
# Numpy.searchsorted does not handle out-of-range well, so handle manually:
|
||||
for i in range(len(df_sub.index)):
|
||||
dt = df_sub.index[i]
|
||||
if dt < df_main.index[0] or dt >= df_main.index[-1] + td:
|
||||
# Out-of-range
|
||||
indices[i] = -1
|
||||
# Numpy.searchsorted does not handle out-of-range well, so handle manually:
|
||||
for i in range(len(df_sub.index)):
|
||||
dt = df_sub.index[i]
|
||||
if dt < df_main.index[0] or dt >= df_main.index[-1] + td:
|
||||
# Out-of-range
|
||||
indices[i] = -1
|
||||
|
||||
f_outOfRange = indices == -1
|
||||
if f_outOfRange.any() and not intraday:
|
||||
empty_row_data = {c:[_np.nan] for c in const.price_colnames}|{'Volume':[0]}
|
||||
if interval == '1d':
|
||||
# For 1d, add all out-of-range event dates
|
||||
for i in _np.where(f_outOfRange)[0]:
|
||||
dt = df_sub.index[i]
|
||||
get_yf_logger().debug(f"Adding out-of-range {data_col} @ {dt.date()} in new prices row of NaNs")
|
||||
empty_row = _pd.DataFrame(data=empty_row_data, index=[dt])
|
||||
df_main = _pd.concat([df_main, empty_row], sort=True)
|
||||
if f_outOfRange.any():
|
||||
if intraday:
|
||||
# Discard out-of-range dividends in intraday data, assume user not interested
|
||||
df_sub = df_sub[~f_outOfRange]
|
||||
if df_sub.empty:
|
||||
df_main['Dividends'] = 0.0
|
||||
return df_main
|
||||
else:
|
||||
# Else, only add out-of-range event dates if occurring in interval
|
||||
# immediately after last pricfe row
|
||||
last_dt = df_main.index[-1]
|
||||
next_interval_start_dt = last_dt + td
|
||||
next_interval_end_dt = next_interval_start_dt + td
|
||||
for i in _np.where(f_outOfRange)[0]:
|
||||
dt = df_sub.index[i]
|
||||
if next_interval_start_dt <= dt < next_interval_end_dt:
|
||||
new_dt = next_interval_start_dt
|
||||
empty_row_data = {c:[_np.nan] for c in const.price_colnames}|{'Volume':[0]}
|
||||
if interval == '1d':
|
||||
# For 1d, add all out-of-range event dates
|
||||
for i in _np.where(f_outOfRange)[0]:
|
||||
dt = df_sub.index[i]
|
||||
get_yf_logger().debug(f"Adding out-of-range {data_col} @ {dt.date()} in new prices row of NaNs")
|
||||
empty_row = _pd.DataFrame(data=empty_row_data, index=[dt])
|
||||
df_main = _pd.concat([df_main, empty_row], sort=True)
|
||||
df_main = df_main.sort_index()
|
||||
else:
|
||||
# Else, only add out-of-range event dates if occurring in interval
|
||||
# immediately after last price row
|
||||
last_dt = df_main.index[-1]
|
||||
next_interval_start_dt = last_dt + td
|
||||
next_interval_end_dt = next_interval_start_dt + td
|
||||
for i in _np.where(f_outOfRange)[0]:
|
||||
dt = df_sub.index[i]
|
||||
if next_interval_start_dt <= dt < next_interval_end_dt:
|
||||
new_dt = next_interval_start_dt
|
||||
get_yf_logger().debug(f"Adding out-of-range {data_col} @ {dt.date()} in new prices row of NaNs")
|
||||
empty_row = _pd.DataFrame(data=empty_row_data, index=[dt])
|
||||
df_main = _pd.concat([df_main, empty_row], sort=True)
|
||||
df_main = df_main.sort_index()
|
||||
|
||||
# Re-calculate indices
|
||||
indices = _np.searchsorted(_np.append(df_main.index, df_main.index[-1] + td), df_sub.index, side='right')
|
||||
indices -= 1 # Convert from [[i-1], [i]) to [[i], [i+1])
|
||||
# Numpy.searchsorted does not handle out-of-range well, so handle manually:
|
||||
for i in range(len(df_sub.index)):
|
||||
dt = df_sub.index[i]
|
||||
if dt < df_main.index[0] or dt >= df_main.index[-1] + td:
|
||||
# Out-of-range
|
||||
indices[i] = -1
|
||||
# Re-calculate indices
|
||||
indices = _np.searchsorted(_np.append(df_main.index, df_main.index[-1] + td), df_sub.index, side='right')
|
||||
indices -= 1 # Convert from [[i-1], [i]) to [[i], [i+1])
|
||||
# Numpy.searchsorted does not handle out-of-range well, so handle manually:
|
||||
for i in range(len(df_sub.index)):
|
||||
dt = df_sub.index[i]
|
||||
if dt < df_main.index[0] or dt >= df_main.index[-1] + td:
|
||||
# Out-of-range
|
||||
indices[i] = -1
|
||||
|
||||
f_outOfRange = indices == -1
|
||||
if f_outOfRange.any():
|
||||
@@ -891,136 +901,60 @@ class ProgressBar:
|
||||
# TimeZone cache related code
|
||||
# ---------------------------------
|
||||
|
||||
class _KVStore:
|
||||
"""Simple Sqlite backed key/value store, key and value are strings. Should be thread safe."""
|
||||
|
||||
def __init__(self, filename):
|
||||
self._cache_mutex = Lock()
|
||||
with self._cache_mutex:
|
||||
self.conn = _sqlite3.connect(filename, timeout=10, check_same_thread=False)
|
||||
self.conn.execute('pragma journal_mode=wal')
|
||||
try:
|
||||
self.conn.execute('create table if not exists "kv" (key TEXT primary key, value TEXT) without rowid')
|
||||
except Exception as e:
|
||||
if 'near "without": syntax error' in str(e):
|
||||
# "without rowid" requires sqlite 3.8.2. Older versions will raise exception
|
||||
self.conn.execute('create table if not exists "kv" (key TEXT primary key, value TEXT)')
|
||||
else:
|
||||
raise
|
||||
self.conn.commit()
|
||||
_atexit.register(self.close)
|
||||
|
||||
def close(self):
|
||||
if self.conn is not None:
|
||||
with self._cache_mutex:
|
||||
self.conn.close()
|
||||
self.conn = None
|
||||
|
||||
def get(self, key: str) -> Union[str, None]:
|
||||
"""Get value for key if it exists else returns None"""
|
||||
try:
|
||||
item = self.conn.execute('select value from "kv" where key=?', (key,))
|
||||
except _sqlite3.IntegrityError as e:
|
||||
self.delete(key)
|
||||
return None
|
||||
if item:
|
||||
return next(item, (None,))[0]
|
||||
|
||||
def set(self, key: str, value: str) -> None:
|
||||
if value is None:
|
||||
self.delete(key)
|
||||
else:
|
||||
with self._cache_mutex:
|
||||
self.conn.execute('replace into "kv" (key, value) values (?,?)', (key, value))
|
||||
self.conn.commit()
|
||||
|
||||
def bulk_set(self, kvdata: Dict[str, str]):
|
||||
records = tuple(i for i in kvdata.items())
|
||||
with self._cache_mutex:
|
||||
self.conn.executemany('replace into "kv" (key, value) values (?,?)', records)
|
||||
self.conn.commit()
|
||||
|
||||
def delete(self, key: str):
|
||||
with self._cache_mutex:
|
||||
self.conn.execute('delete from "kv" where key=?', (key,))
|
||||
self.conn.commit()
|
||||
_cache_dir = _os.path.join(_ad.user_cache_dir(), "py-yfinance")
|
||||
DB_PATH = _os.path.join(_cache_dir, 'tkr-tz.db')
|
||||
db = _peewee.SqliteDatabase(DB_PATH, pragmas={'journal_mode': 'wal', 'cache_size': -64})
|
||||
_tz_cache = None
|
||||
|
||||
|
||||
class _TzCacheException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class KV(_peewee.Model):
|
||||
key = _peewee.CharField(primary_key=True)
|
||||
value = _peewee.CharField(null=True)
|
||||
|
||||
class Meta:
|
||||
database = db
|
||||
without_rowid = True
|
||||
|
||||
|
||||
class _TzCache:
|
||||
"""Simple sqlite file cache of ticker->timezone"""
|
||||
|
||||
def __init__(self):
|
||||
self._setup_cache_folder()
|
||||
# Must init db here, where is thread-safe
|
||||
db.connect()
|
||||
db.create_tables([KV])
|
||||
|
||||
old_cache_file_path = _os.path.join(_cache_dir, "tkr-tz.csv")
|
||||
if _os.path.isfile(old_cache_file_path):
|
||||
_os.remove(old_cache_file_path)
|
||||
|
||||
def lookup(self, key):
|
||||
try:
|
||||
self._tz_db = _KVStore(_os.path.join(self._db_dir, "tkr-tz.db"))
|
||||
except _sqlite3.DatabaseError as err:
|
||||
raise _TzCacheException(f"Error creating TzCache folder: '{self._db_dir}' reason: {err}")
|
||||
self._migrate_cache_tkr_tz()
|
||||
|
||||
def _setup_cache_folder(self):
|
||||
if not _os.path.isdir(self._db_dir):
|
||||
try:
|
||||
_os.makedirs(self._db_dir)
|
||||
except OSError as err:
|
||||
raise _TzCacheException(f"Error creating TzCache folder: '{self._db_dir}' reason: {err}")
|
||||
|
||||
elif not (_os.access(self._db_dir, _os.R_OK) and _os.access(self._db_dir, _os.W_OK)):
|
||||
raise _TzCacheException(f"Cannot read and write in TzCache folder: '{self._db_dir}'")
|
||||
|
||||
def lookup(self, tkr):
|
||||
return self.tz_db.get(tkr)
|
||||
|
||||
def store(self, tkr, tz):
|
||||
if tz is None:
|
||||
self.tz_db.delete(tkr)
|
||||
else:
|
||||
tz_db = self.tz_db.get(tkr)
|
||||
if tz_db is not None:
|
||||
if tz != tz_db:
|
||||
get_yf_logger().debug(f'{tkr}: Overwriting cached TZ "{tz_db}" with different TZ "{tz}"')
|
||||
self.tz_db.set(tkr, tz)
|
||||
else:
|
||||
self.tz_db.set(tkr, tz)
|
||||
|
||||
@property
|
||||
def _db_dir(self):
|
||||
global _cache_dir
|
||||
return _os.path.join(_cache_dir, "py-yfinance")
|
||||
|
||||
@property
|
||||
def tz_db(self):
|
||||
return self._tz_db
|
||||
|
||||
def _migrate_cache_tkr_tz(self):
|
||||
"""Migrate contents from old ticker CSV-cache to SQLite db"""
|
||||
old_cache_file_path = _os.path.join(self._db_dir, "tkr-tz.csv")
|
||||
|
||||
if not _os.path.isfile(old_cache_file_path):
|
||||
return KV.get(KV.key == key).value
|
||||
except KV.DoesNotExist:
|
||||
return None
|
||||
try:
|
||||
df = _pd.read_csv(old_cache_file_path, index_col="Ticker", on_bad_lines="skip")
|
||||
except _pd.errors.EmptyDataError:
|
||||
_os.remove(old_cache_file_path)
|
||||
except TypeError:
|
||||
_os.remove(old_cache_file_path)
|
||||
else:
|
||||
# Discard corrupt data:
|
||||
df = df[~df["Tz"].isna().to_numpy()]
|
||||
df = df[~(df["Tz"] == '').to_numpy()]
|
||||
df = df[~df.index.isna()]
|
||||
if not df.empty:
|
||||
try:
|
||||
self.tz_db.bulk_set(df.to_dict()['Tz'])
|
||||
except Exception as e:
|
||||
# Ignore
|
||||
pass
|
||||
|
||||
_os.remove(old_cache_file_path)
|
||||
def store(self, key, value):
|
||||
try:
|
||||
if value is None:
|
||||
q = KV.delete().where(KV.key == key)
|
||||
q.execute()
|
||||
return
|
||||
with db.atomic():
|
||||
KV.insert(key=key, value=value).execute()
|
||||
except IntegrityError:
|
||||
# Integrity error means the key already exists. Try updating the key.
|
||||
old_value = self.lookup(key)
|
||||
if old_value != value:
|
||||
get_yf_logger().debug(f"Value for key {key} changed from {old_value} to {value}.")
|
||||
with db.atomic():
|
||||
q = KV.update(value=value).where(KV.key == key)
|
||||
q.execute()
|
||||
|
||||
def close(self):
|
||||
db.close()
|
||||
|
||||
|
||||
class _TzCacheDummy:
|
||||
@@ -1058,9 +992,7 @@ def get_tz_cache():
|
||||
return _tz_cache
|
||||
|
||||
|
||||
_cache_dir = _ad.user_cache_dir()
|
||||
_cache_init_lock = Lock()
|
||||
_tz_cache = None
|
||||
|
||||
|
||||
def set_tz_cache_location(cache_dir: str):
|
||||
|
||||
@@ -1 +1 @@
|
||||
version = "0.2.28"
|
||||
version = "0.2.29"
|
||||
|
||||
Reference in New Issue
Block a user