Compare commits
65 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fb5c67b3bd | ||
|
|
3f33aa0377 | ||
|
|
ecdc36ab8e | ||
|
|
fbc5de153a | ||
|
|
e4a228b830 | ||
|
|
3cee66dea7 | ||
|
|
bec5b38189 | ||
|
|
f5973b2c89 | ||
|
|
edb911b913 | ||
|
|
6117b0a042 | ||
|
|
5cb5484a9a | ||
|
|
4e33ddf615 | ||
|
|
6d87f3d689 | ||
|
|
b30b97fa36 | ||
|
|
6253e1d8a0 | ||
|
|
2dce6a705c | ||
|
|
df11fcdb37 | ||
|
|
567e2cf0d3 | ||
|
|
3d6e88857b | ||
|
|
59af19d84c | ||
|
|
e07191b627 | ||
|
|
2623ba967d | ||
|
|
fe1c705e24 | ||
|
|
9315f7b61d | ||
|
|
f76c788881 | ||
|
|
561f56c9f9 | ||
|
|
cf795ea0c7 | ||
|
|
643536b53b | ||
|
|
ae8a5ff996 | ||
|
|
d01d378c8d | ||
|
|
9e0152aae4 | ||
|
|
6c21c1994e | ||
|
|
d24a25f579 | ||
|
|
422a50672d | ||
|
|
6e09410c7d | ||
|
|
3c51687351 | ||
|
|
783df54978 | ||
|
|
c76bf0128f | ||
|
|
33f57ac002 | ||
|
|
c0e1536179 | ||
|
|
303e0ea655 | ||
|
|
40424b71a6 | ||
|
|
b018f917a9 | ||
|
|
28e50946ca | ||
|
|
841b485b1d | ||
|
|
e842a9d657 | ||
|
|
0f14728591 | ||
|
|
69dfe325ae | ||
|
|
f20aa9a875 | ||
|
|
5707c1aa65 | ||
|
|
1e7f4a9a91 | ||
|
|
37c36549e4 | ||
|
|
bda339b170 | ||
|
|
f5995161ed | ||
|
|
4734e92090 | ||
|
|
5fdf2463e9 | ||
|
|
c679551faa | ||
|
|
fdf52ac360 | ||
|
|
94ad0bd955 | ||
|
|
51c0ea0050 | ||
|
|
3401d4dbe7 | ||
|
|
a724585552 | ||
|
|
1c85433cc0 | ||
|
|
5c0b2bbaa3 | ||
|
|
7d45a6709a |
@@ -1,6 +1,16 @@
|
||||
Change Log
|
||||
===========
|
||||
|
||||
0.2.0rc1
|
||||
------
|
||||
Jumping to 0.2 for this big update. 0.1.* will continue to receive bug-fixes
|
||||
- timezone cache performance massively improved. Thanks @fredrik-corneliusson #1113 #1112 #1109 #1105 #1099
|
||||
- price repair feature #1110
|
||||
- fix merging of dividends/splits with prices #1069 #1086 #1102
|
||||
- fix Yahoo returning latest price interval across 2 rows #1070
|
||||
- optional: raise errors as exceptions: raise_errors=True #1104
|
||||
- add proper unit tests #1069
|
||||
|
||||
0.1.81
|
||||
------
|
||||
- Fix unhandled tz-cache exception #1107
|
||||
|
||||
@@ -198,6 +198,9 @@ data = yf.download( # or pdr.get_data_yahoo(...
|
||||
# (optional, default is False)
|
||||
auto_adjust = True,
|
||||
|
||||
# identify and attempt repair of currency unit mixups e.g. $/cents
|
||||
repair = False,
|
||||
|
||||
# download pre/post regular market hours data
|
||||
# (optional, default is False)
|
||||
prepost = True,
|
||||
|
||||
@@ -4,3 +4,4 @@ requests>=2.26
|
||||
multitasking>=0.0.7
|
||||
lxml>=4.5.1
|
||||
appdirs>=1.4.4
|
||||
pytz>=2022.5
|
||||
2
setup.py
2
setup.py
@@ -63,7 +63,7 @@ setup(
|
||||
packages=find_packages(exclude=['contrib', 'docs', 'tests', 'examples']),
|
||||
install_requires=['pandas>=0.24.0', 'numpy>=1.15',
|
||||
'requests>=2.26', 'multitasking>=0.0.7',
|
||||
'lxml>=4.5.1', 'appdirs>=1.4.4'],
|
||||
'lxml>=4.5.1', 'appdirs>=1.4.4', 'pytz>=2022.5'],
|
||||
entry_points={
|
||||
'console_scripts': [
|
||||
'sample=sample:main',
|
||||
|
||||
1
tests/__init__.py
Normal file
1
tests/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
#!/usr/bin/env python
|
||||
9
tests/context.py
Normal file
9
tests/context.py
Normal file
@@ -0,0 +1,9 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import sys
|
||||
import os
|
||||
_parent_dp = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
|
||||
_src_dp = _parent_dp
|
||||
sys.path.insert(0, _src_dp)
|
||||
|
||||
import yfinance
|
||||
482
tests/prices.py
Normal file
482
tests/prices.py
Normal file
@@ -0,0 +1,482 @@
|
||||
from .context import yfinance as yf
|
||||
|
||||
import unittest
|
||||
|
||||
import datetime as _dt
|
||||
import pytz as _tz
|
||||
import numpy as _np
|
||||
import pandas as _pd
|
||||
|
||||
# Create temp session
|
||||
import requests_cache, tempfile
|
||||
td = tempfile.TemporaryDirectory()
|
||||
|
||||
class TestPriceHistory(unittest.TestCase):
|
||||
def setUp(self):
|
||||
global td ; self.td = td
|
||||
self.session = requests_cache.CachedSession(self.td.name+'/'+"yfinance.cache")
|
||||
|
||||
def tearDown(self):
|
||||
self.session.close()
|
||||
|
||||
|
||||
def test_daily_index(self):
|
||||
tkrs = []
|
||||
tkrs.append("BHP.AX")
|
||||
tkrs.append("IMP.JO")
|
||||
tkrs.append("BP.L")
|
||||
tkrs.append("PNL.L")
|
||||
tkrs.append("INTC")
|
||||
|
||||
intervals=["1d","1wk","1mo"]
|
||||
|
||||
for tkr in tkrs:
|
||||
dat = yf.Ticker(tkr, session=self.session)
|
||||
|
||||
for interval in intervals:
|
||||
df = dat.history(period="5y", interval=interval)
|
||||
|
||||
f = df.index.time==_dt.time(0)
|
||||
self.assertTrue(f.all())
|
||||
|
||||
|
||||
def test_duplicatingDaily(self):
|
||||
tkrs = []
|
||||
tkrs.append("IMP.JO")
|
||||
tkrs.append("BHG.JO")
|
||||
tkrs.append("SSW.JO")
|
||||
tkrs.append("BP.L")
|
||||
tkrs.append("INTC")
|
||||
test_run = False
|
||||
for tkr in tkrs:
|
||||
dat = yf.Ticker(tkr, session=self.session)
|
||||
tz = dat._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
|
||||
|
||||
dt_utc = _tz.timezone("UTC").localize(_dt.datetime.utcnow())
|
||||
dt = dt_utc.astimezone(_tz.timezone(tz))
|
||||
if dt.time() < _dt.time(17,0):
|
||||
continue
|
||||
test_run = True
|
||||
|
||||
df = dat.history(start=dt.date()-_dt.timedelta(days=7), interval="1d")
|
||||
|
||||
dt0 = df.index[-2]
|
||||
dt1 = df.index[-1]
|
||||
try:
|
||||
self.assertNotEqual(dt0, dt1)
|
||||
except:
|
||||
print("Ticker = ", tkr)
|
||||
raise
|
||||
|
||||
if not test_run:
|
||||
self.skipTest("Skipping test_duplicatingDaily() because only expected to fail just after market close")
|
||||
|
||||
|
||||
def test_duplicatingWeekly(self):
|
||||
tkrs = ['MSFT', 'IWO', 'VFINX', '^GSPC', 'BTC-USD']
|
||||
test_run = False
|
||||
for tkr in tkrs:
|
||||
dat = yf.Ticker(tkr, session=self.session)
|
||||
tz = dat._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
|
||||
|
||||
dt = _tz.timezone(tz).localize(_dt.datetime.now())
|
||||
if dt.date().weekday() not in [1,2,3,4]:
|
||||
continue
|
||||
test_run = True
|
||||
|
||||
df = dat.history(start=dt.date()-_dt.timedelta(days=7), interval="1wk")
|
||||
dt0 = df.index[-2]
|
||||
dt1 = df.index[-1]
|
||||
try:
|
||||
self.assertNotEqual(dt0.week, dt1.week)
|
||||
except:
|
||||
print("Ticker={}: Last two rows within same week:".format(tkr))
|
||||
print(df.iloc[df.shape[0]-2:])
|
||||
raise
|
||||
|
||||
if not test_run:
|
||||
self.skipTest("Skipping test_duplicatingWeekly() because not possible to fail Monday/weekend")
|
||||
|
||||
|
||||
def test_intraDayWithEvents(self):
|
||||
# TASE dividend release pre-market, doesn't merge nicely with intra-day data so check still present
|
||||
|
||||
tkr = "ICL.TA"
|
||||
# tkr = "ESLT.TA"
|
||||
# tkr = "ONE.TA"
|
||||
# tkr = "MGDL.TA"
|
||||
start_d = _dt.date.today() - _dt.timedelta(days=60)
|
||||
end_d = None
|
||||
df_daily = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=True)
|
||||
df_daily_divs = df_daily["Dividends"][df_daily["Dividends"]!=0]
|
||||
if df_daily_divs.shape[0]==0:
|
||||
self.skipTest("Skipping test_intraDayWithEvents() because 'ICL.TA' has no dividend in last 60 days")
|
||||
|
||||
last_div_date = df_daily_divs.index[-1]
|
||||
start_d = last_div_date.date()
|
||||
end_d = last_div_date.date() + _dt.timedelta(days=1)
|
||||
df = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="15m", actions=True)
|
||||
self.assertTrue((df["Dividends"]!=0.0).any())
|
||||
|
||||
|
||||
def test_dailyWithEvents(self):
|
||||
# Reproduce issue #521
|
||||
tkr1 = "QQQ"
|
||||
tkr2 = "GDX"
|
||||
start_d = "2014-12-29"
|
||||
end_d = "2020-11-29"
|
||||
df1 = yf.Ticker(tkr1).history(start=start_d, end=end_d, interval="1d", actions=True)
|
||||
df2 = yf.Ticker(tkr2).history(start=start_d, end=end_d, interval="1d", actions=True)
|
||||
try:
|
||||
self.assertTrue(df1.index.equals(df2.index))
|
||||
except:
|
||||
missing_from_df1 = df2.index.difference(df1.index)
|
||||
missing_from_df2 = df1.index.difference(df2.index)
|
||||
print("{} missing these dates: {}".format(tkr1, missing_from_df1))
|
||||
print("{} missing these dates: {}".format(tkr2, missing_from_df2))
|
||||
raise
|
||||
|
||||
# Test that index same with and without events:
|
||||
tkrs = [tkr1, tkr2]
|
||||
for tkr in tkrs:
|
||||
df1 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=True)
|
||||
df2 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=False)
|
||||
try:
|
||||
self.assertTrue(df1.index.equals(df2.index))
|
||||
except:
|
||||
missing_from_df1 = df2.index.difference(df1.index)
|
||||
missing_from_df2 = df1.index.difference(df2.index)
|
||||
print("{}-with-events missing these dates: {}".format(tkr, missing_from_df1))
|
||||
print("{}-without-events missing these dates: {}".format(tkr, missing_from_df2))
|
||||
raise
|
||||
|
||||
|
||||
def test_weeklyWithEvents(self):
|
||||
# Reproduce issue #521
|
||||
tkr1 = "QQQ"
|
||||
tkr2 = "GDX"
|
||||
start_d = "2014-12-29"
|
||||
end_d = "2020-11-29"
|
||||
df1 = yf.Ticker(tkr1).history(start=start_d, end=end_d, interval="1wk", actions=True)
|
||||
df2 = yf.Ticker(tkr2).history(start=start_d, end=end_d, interval="1wk", actions=True)
|
||||
try:
|
||||
self.assertTrue(df1.index.equals(df2.index))
|
||||
except:
|
||||
missing_from_df1 = df2.index.difference(df1.index)
|
||||
missing_from_df2 = df1.index.difference(df2.index)
|
||||
print("{} missing these dates: {}".format(tkr1, missing_from_df1))
|
||||
print("{} missing these dates: {}".format(tkr2, missing_from_df2))
|
||||
raise
|
||||
|
||||
# Test that index same with and without events:
|
||||
tkrs = [tkr1, tkr2]
|
||||
for tkr in tkrs:
|
||||
df1 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1wk", actions=True)
|
||||
df2 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1wk", actions=False)
|
||||
try:
|
||||
self.assertTrue(df1.index.equals(df2.index))
|
||||
except:
|
||||
missing_from_df1 = df2.index.difference(df1.index)
|
||||
missing_from_df2 = df1.index.difference(df2.index)
|
||||
print("{}-with-events missing these dates: {}".format(tkr, missing_from_df1))
|
||||
print("{}-without-events missing these dates: {}".format(tkr, missing_from_df2))
|
||||
raise
|
||||
|
||||
|
||||
def test_monthlyWithEvents(self):
|
||||
tkr1 = "QQQ"
|
||||
tkr2 = "GDX"
|
||||
start_d = "2014-12-29"
|
||||
end_d = "2020-11-29"
|
||||
df1 = yf.Ticker(tkr1).history(start=start_d, end=end_d, interval="1mo", actions=True)
|
||||
df2 = yf.Ticker(tkr2).history(start=start_d, end=end_d, interval="1mo", actions=True)
|
||||
try:
|
||||
self.assertTrue(df1.index.equals(df2.index))
|
||||
except:
|
||||
missing_from_df1 = df2.index.difference(df1.index)
|
||||
missing_from_df2 = df1.index.difference(df2.index)
|
||||
print("{} missing these dates: {}".format(tkr1, missing_from_df1))
|
||||
print("{} missing these dates: {}".format(tkr2, missing_from_df2))
|
||||
raise
|
||||
|
||||
# Test that index same with and without events:
|
||||
tkrs = [tkr1, tkr2]
|
||||
for tkr in tkrs:
|
||||
df1 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1mo", actions=True)
|
||||
df2 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1mo", actions=False)
|
||||
try:
|
||||
self.assertTrue(df1.index.equals(df2.index))
|
||||
except:
|
||||
missing_from_df1 = df2.index.difference(df1.index)
|
||||
missing_from_df2 = df1.index.difference(df2.index)
|
||||
print("{}-with-events missing these dates: {}".format(tkr, missing_from_df1))
|
||||
print("{}-without-events missing these dates: {}".format(tkr, missing_from_df2))
|
||||
raise
|
||||
|
||||
|
||||
def test_tz_dst_ambiguous(self):
|
||||
# Reproduce issue #1100
|
||||
|
||||
try:
|
||||
yf.Ticker("ESLT.TA", session=self.session).history(start="2002-10-06", end="2002-10-09", interval="1d")
|
||||
except _tz.exceptions.AmbiguousTimeError:
|
||||
raise Exception("Ambiguous DST issue not resolved")
|
||||
|
||||
|
||||
def test_repair_weekly(self):
|
||||
# Sometimes, Yahoo returns prices 100x the correct value.
|
||||
# Suspect mixup between £/pence or $/cents etc.
|
||||
# E.g. ticker PNL.L
|
||||
|
||||
# Setup:
|
||||
tkr = "PNL.L"
|
||||
error_threshold = 1000.0
|
||||
start = "2020-01-06"
|
||||
end = min(_dt.date.today(), _dt.date(2023,1,1))
|
||||
|
||||
# Run test
|
||||
|
||||
dat = yf.Ticker(tkr, session=self.session)
|
||||
df_bad = dat.history(start=start, end=end, interval="1wk", auto_adjust=False, repair=False)
|
||||
|
||||
# Record the errors that will be repaired
|
||||
data_cols = ["Low","High","Open","Close","Adj Close"]
|
||||
f_outlier = _np.where(df_bad[data_cols]>error_threshold)
|
||||
indices = None
|
||||
if len(f_outlier[0])==0:
|
||||
self.skipTest("Skipping test_repair_weekly() because no price 100x errors to repair")
|
||||
indices = []
|
||||
for i in range(len(f_outlier[0])):
|
||||
indices.append((f_outlier[0][i], f_outlier[1][i]))
|
||||
|
||||
df = dat.history(start=start, end=end, interval="1wk", auto_adjust=False, repair=True)
|
||||
|
||||
# First test - no errors left after repair
|
||||
df_data = df[data_cols].values
|
||||
for i,j in indices:
|
||||
try:
|
||||
self.assertTrue(df_data[i,j] < error_threshold)
|
||||
except:
|
||||
print("Detected uncorrected error: idx={}, {}={}".format(df.index[i], data_cols[j], df_data[i,j]))
|
||||
raise
|
||||
|
||||
# Second test - all differences between pre- and post-repair should be ~100x
|
||||
ratio = (df_bad[data_cols].values/df[data_cols].values).round(2)
|
||||
# - round near-100 ratios to 100:
|
||||
f_near_100 = (ratio>90)&(ratio<110)
|
||||
ratio[f_near_100] = (ratio[f_near_100]/10).round().astype(int)*10 # round ratio to nearest 10
|
||||
# - now test
|
||||
f_100 = ratio==100
|
||||
f_1 = ratio==1
|
||||
self.assertTrue((f_100|f_1).all())
|
||||
|
||||
# Third test: compare directly against daily data, unadjusted
|
||||
df = dat.history(start=start, end=end, interval="1wk", auto_adjust=False, repair=True)
|
||||
for i in indices:
|
||||
dt = df.index[i[0]]
|
||||
|
||||
df_daily = dat.history(start=dt, end=dt+_dt.timedelta(days=7), interval="1d", auto_adjust=False, repair=True)
|
||||
|
||||
# Manually construct weekly price data from daily
|
||||
df_yf_weekly = df_daily.copy()
|
||||
df_yf_weekly["_weekStart"] = _pd.to_datetime(df_yf_weekly.index.tz_localize(None).to_period('W-SUN').start_time).tz_localize(df.index.tz)
|
||||
df_yf_weekly.loc[df_yf_weekly["Stock Splits"]==0,"Stock Splits"]=1
|
||||
df_yf_weekly = df_yf_weekly.groupby("_weekStart").agg(
|
||||
Open=("Open", "first"),
|
||||
Close=("Close", "last"),
|
||||
AdjClose=("Adj Close", "last"),
|
||||
Low=("Low", "min"),
|
||||
High=("High", "max"),
|
||||
Volume=("Volume", "sum"),
|
||||
Dividends=("Dividends", "sum"),
|
||||
StockSplits=("Stock Splits", "prod")).rename(columns={"StockSplits":"Stock Splits","AdjClose":"Adj Close"})
|
||||
df_yf_weekly.loc[df_yf_weekly["Stock Splits"]==1,"Stock Splits"]=0
|
||||
if df_yf_weekly.index[0] not in df_daily.index:
|
||||
# Exchange closed Monday. In this case, Yahoo sets Open to last week close
|
||||
df_daily_last_week = dat.history(start=dt-_dt.timedelta(days=7), end=dt, interval="1d", auto_adjust=False, repair=True)
|
||||
df_yf_weekly["Open"] = df_daily_last_week["Close"][-1]
|
||||
df_yf_weekly["Low"] = _np.minimum(df_yf_weekly["Low"], df_yf_weekly["Open"])
|
||||
|
||||
# Compare fetched-weekly vs constructed-weekly:
|
||||
df_yf_weekly = df_yf_weekly[df.columns]
|
||||
try:
|
||||
# Note: Adj Close has tiny variance depending on date range requested
|
||||
data_cols = ["Open","Close","Low","High"]
|
||||
self.assertTrue(_np.equal(df.loc[dt,data_cols].values, df_yf_weekly[data_cols].iloc[0].values).all())
|
||||
self.assertLess(abs(df.loc[dt,"Adj Close"]/df_yf_weekly["Adj Close"].iloc[0] -1.0), 0.000001)
|
||||
except:
|
||||
for c in df.columns:
|
||||
if c=="Adj Close":
|
||||
fail = abs(df.loc[dt,c]/df_yf_weekly[c].iloc[0] -1.0) < 0.000001
|
||||
else:
|
||||
fail = df.loc[dt,c] != df_yf_weekly[c].iloc[0]
|
||||
if fail:
|
||||
print("dt = ",dt)
|
||||
print("df.loc[dt]:", type(df.loc[dt]))
|
||||
print(df.loc[dt].to_dict())
|
||||
print("df_yf_weekly.iloc[0]:", type(df_yf_weekly.iloc[0]))
|
||||
print(df_yf_weekly.iloc[0].to_dict())
|
||||
print("Result:", df.loc[dt,c])
|
||||
print("Answer:", df_yf_weekly[c].iloc[0])
|
||||
raise Exception("Mismatch in column '{}'".format(c))
|
||||
|
||||
|
||||
def test_repair_weekly2_preSplit(self):
|
||||
# Sometimes, Yahoo returns prices 100x the correct value.
|
||||
# Suspect mixup between £/pence or $/cents etc.
|
||||
# E.g. ticker PNL.L
|
||||
|
||||
# PNL.L has a stock-split in 2022. Sometimes requesting data before 2022 is not split-adjusted.
|
||||
|
||||
# Setup:
|
||||
tkr = "PNL.L"
|
||||
error_threshold = 1000.0
|
||||
start = "2020-01-06"
|
||||
end = "2021-06-01"
|
||||
|
||||
# Run test
|
||||
|
||||
dat = yf.Ticker(tkr, session=self.session)
|
||||
df_bad = dat.history(start=start, end=end, interval="1wk", auto_adjust=False, repair=False)
|
||||
|
||||
# Record the errors that will be repaired
|
||||
data_cols = ["Low","High","Open","Close","Adj Close"]
|
||||
f_outlier = _np.where(df_bad[data_cols]>error_threshold)
|
||||
indices = None
|
||||
if len(f_outlier[0])==0:
|
||||
self.skipTest("Skipping test_repair_weekly() because no price 100x errors to repair")
|
||||
indices = []
|
||||
for i in range(len(f_outlier[0])):
|
||||
indices.append((f_outlier[0][i], f_outlier[1][i]))
|
||||
|
||||
df = dat.history(start=start, end=end, interval="1wk", auto_adjust=False, repair=True)
|
||||
|
||||
# First test - no errors left after repair
|
||||
df_data = df[data_cols].values
|
||||
for i,j in indices:
|
||||
try:
|
||||
self.assertTrue(df_data[i,j] < error_threshold)
|
||||
except:
|
||||
print("Detected uncorrected error: idx={}, {}={}".format(df.index[i], data_cols[j], df_data[i,j]))
|
||||
raise
|
||||
|
||||
# Second test - all differences between pre- and post-repair should be ~100x
|
||||
ratio = (df_bad[data_cols].values/df[data_cols].values).round(2)
|
||||
# - round near-100 ratios to 100:
|
||||
f_near_100 = (ratio>90)&(ratio<110)
|
||||
ratio[f_near_100] = (ratio[f_near_100]/10).round().astype(int)*10 # round ratio to nearest 10
|
||||
# - now test
|
||||
f_100 = ratio==100
|
||||
f_1 = ratio==1
|
||||
self.assertTrue((f_100|f_1).all())
|
||||
|
||||
# Third test: compare directly against daily data, unadjusted
|
||||
df = dat.history(start=start, end=end, interval="1wk", auto_adjust=False, repair=True)
|
||||
for i in indices:
|
||||
dt = df.index[i[0]]
|
||||
|
||||
df_daily = dat.history(start=dt, end=dt+_dt.timedelta(days=7), interval="1d", auto_adjust=False, repair=True)
|
||||
|
||||
# Manually construct weekly price data from daily
|
||||
df_yf_weekly = df_daily.copy()
|
||||
df_yf_weekly["_weekStart"] = _pd.to_datetime(df_yf_weekly.index.tz_localize(None).to_period('W-SUN').start_time).tz_localize(df.index.tz)
|
||||
df_yf_weekly.loc[df_yf_weekly["Stock Splits"]==0,"Stock Splits"]=1
|
||||
df_yf_weekly = df_yf_weekly.groupby("_weekStart").agg(
|
||||
Open=("Open", "first"),
|
||||
Close=("Close", "last"),
|
||||
AdjClose=("Adj Close", "last"),
|
||||
Low=("Low", "min"),
|
||||
High=("High", "max"),
|
||||
Volume=("Volume", "sum"),
|
||||
Dividends=("Dividends", "sum"),
|
||||
StockSplits=("Stock Splits", "prod")).rename(columns={"StockSplits":"Stock Splits","AdjClose":"Adj Close"})
|
||||
df_yf_weekly.loc[df_yf_weekly["Stock Splits"]==1,"Stock Splits"]=0
|
||||
if df_yf_weekly.index[0] not in df_daily.index:
|
||||
# Exchange closed Monday. In this case, Yahoo sets Open to last week close
|
||||
df_daily_last_week = dat.history(start=dt-_dt.timedelta(days=7), end=dt, interval="1d", auto_adjust=False, repair=True)
|
||||
df_yf_weekly["Open"] = df_daily_last_week["Close"][-1]
|
||||
df_yf_weekly["Low"] = _np.minimum(df_yf_weekly["Low"], df_yf_weekly["Open"])
|
||||
|
||||
# Compare fetched-weekly vs constructed-weekly:
|
||||
df_yf_weekly = df_yf_weekly[df.columns]
|
||||
try:
|
||||
# Note: Adj Close has tiny variance depending on date range requested
|
||||
data_cols = ["Open","Close","Low","High"]
|
||||
self.assertTrue(_np.equal(df.loc[dt,data_cols].values, df_yf_weekly[data_cols].iloc[0].values).all())
|
||||
self.assertLess(abs(df.loc[dt,"Adj Close"]/df_yf_weekly["Adj Close"].iloc[0] -1.0), 0.000001)
|
||||
except:
|
||||
for c in df.columns:
|
||||
if c=="Adj Close":
|
||||
fail = abs(df.loc[dt,c]/df_yf_weekly[c].iloc[0] -1.0) < 0.000001
|
||||
else:
|
||||
fail = df.loc[dt,c] != df_yf_weekly[c].iloc[0]
|
||||
if fail:
|
||||
print("dt = ",dt)
|
||||
print("df.loc[dt]:", type(df.loc[dt]))
|
||||
print(df.loc[dt].to_dict())
|
||||
print("df_yf_weekly.iloc[0]:", type(df_yf_weekly.iloc[0]))
|
||||
print(df_yf_weekly.iloc[0].to_dict())
|
||||
print("Result:", df.loc[dt,c])
|
||||
print("Answer:", df_yf_weekly[c].iloc[0])
|
||||
raise Exception("Mismatch in column '{}'".format(c))
|
||||
|
||||
|
||||
def test_repair_daily(self):
|
||||
# Sometimes, Yahoo returns prices 100x the correct value.
|
||||
# Suspect mixup between £/pence or $/cents etc.
|
||||
# E.g. ticker PNL.L
|
||||
|
||||
tkr = "PNL.L"
|
||||
start = "2020-01-01"
|
||||
end = min(_dt.date.today(), _dt.date(2023,1,1))
|
||||
dat = yf.Ticker(tkr, session=self.session)
|
||||
|
||||
data_cols = ["Low","High","Open","Close","Adj Close"]
|
||||
df_bad = dat.history(start=start, end=end, interval="1d", auto_adjust=False, repair=False)
|
||||
f_outlier = _np.where(df_bad[data_cols]>1000.0)
|
||||
indices = None
|
||||
if len(f_outlier[0])==0:
|
||||
self.skipTest("Skipping test_repair_daily() because no price 100x errors to repair")
|
||||
|
||||
# Outliers detected
|
||||
indices = []
|
||||
for i in range(len(f_outlier[0])):
|
||||
indices.append((f_outlier[0][i], f_outlier[1][i]))
|
||||
|
||||
df = dat.history(start=start, end=end, interval="1d", auto_adjust=False, repair=True)
|
||||
|
||||
# First test - no errors left
|
||||
df_data = df[data_cols].values
|
||||
for i,j in indices:
|
||||
try:
|
||||
self.assertTrue(df_data[i,j] < 1000.0)
|
||||
except:
|
||||
print("Detected uncorrected error: idx={}, {}={}".format(df.index[i], data_cols[j], df_data[i,j]))
|
||||
# print(df.iloc[i-1:i+2])
|
||||
raise
|
||||
|
||||
# Second test - all differences should be either ~1x or ~100x
|
||||
ratio = df_bad[data_cols].values/df[data_cols].values
|
||||
ratio = ratio.round(2)
|
||||
# - round near-100 ratio to 100:
|
||||
f = ratio>90
|
||||
ratio[f] = (ratio[f]/10).round().astype(int)*10 # round ratio to nearest 10
|
||||
# - now test
|
||||
f_100 = ratio==100
|
||||
f_1 = ratio==1
|
||||
self.assertTrue((f_100|f_1).all())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
# # Run tests sequentially:
|
||||
# import inspect
|
||||
# test_src = inspect.getsource(TestPriceHistory)
|
||||
# unittest.TestLoader.sortTestMethodsUsing = lambda _, x, y: (
|
||||
# test_src.index(f"def {x}") - test_src.index(f"def {y}")
|
||||
# )
|
||||
# unittest.main(verbosity=2)
|
||||
|
||||
td.cleanup()
|
||||
|
||||
33
tests/ticker.py
Normal file
33
tests/ticker.py
Normal file
@@ -0,0 +1,33 @@
|
||||
from .context import yfinance as yf
|
||||
|
||||
import unittest
|
||||
|
||||
class TestTicker(unittest.TestCase):
|
||||
def setUp(self):
|
||||
pass
|
||||
|
||||
def tearDown(self):
|
||||
pass
|
||||
|
||||
|
||||
def test_getTz(self):
|
||||
tkrs = []
|
||||
tkrs.append("IMP.JO")
|
||||
tkrs.append("BHG.JO")
|
||||
tkrs.append("SSW.JO")
|
||||
tkrs.append("BP.L")
|
||||
tkrs.append("INTC")
|
||||
test_run = False
|
||||
for tkr in tkrs:
|
||||
# First step: remove ticker from tz-cache
|
||||
yf.utils.get_tz_cache().store(tkr, None)
|
||||
|
||||
# Test:
|
||||
dat = yf.Ticker(tkr)
|
||||
tz = dat._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
|
||||
|
||||
self.assertIsNotNone(tz)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
393
yfinance/base.py
393
yfinance/base.py
@@ -29,6 +29,8 @@ import pandas as _pd
|
||||
import numpy as _np
|
||||
import re as _re
|
||||
|
||||
from pytz import UnknownTimeZoneError
|
||||
|
||||
try:
|
||||
from urllib.parse import quote as urlencode
|
||||
except ImportError:
|
||||
@@ -100,8 +102,8 @@ class TickerBase():
|
||||
|
||||
def history(self, period="1mo", interval="1d",
|
||||
start=None, end=None, prepost=False, actions=True,
|
||||
auto_adjust=True, back_adjust=False, keepna=False,
|
||||
proxy=None, rounding=False, timeout=None, **kwargs):
|
||||
auto_adjust=True, back_adjust=False, repair=False, keepna=False,
|
||||
proxy=None, rounding=False, timeout=10, **kwargs):
|
||||
"""
|
||||
:Parameters:
|
||||
period : str
|
||||
@@ -123,6 +125,9 @@ class TickerBase():
|
||||
Adjust all OHLC automatically? Default is True
|
||||
back_adjust: bool
|
||||
Back-adjusted data to mimic true historical prices
|
||||
repair: bool
|
||||
Detect currency unit 100x mixups and attempt repair
|
||||
Default is False
|
||||
keepna: bool
|
||||
Keep NaN rows returned by Yahoo?
|
||||
Default is False
|
||||
@@ -134,33 +139,38 @@ class TickerBase():
|
||||
timeout: None or float
|
||||
If not None stops waiting for a response after given number of
|
||||
seconds. (Can also be a fraction of a second e.g. 0.01)
|
||||
Default is None.
|
||||
Default is 10 seconds.
|
||||
**kwargs: dict
|
||||
debug: bool
|
||||
Optional. If passed as False, will suppress
|
||||
error message printing to console.
|
||||
raise_errors: bool
|
||||
Optional. If True, then raise errors as
|
||||
exceptions instead of printing to console.
|
||||
"""
|
||||
|
||||
# Work with errors
|
||||
debug_mode = True
|
||||
if "debug" in kwargs and isinstance(kwargs["debug"], bool):
|
||||
debug_mode = kwargs["debug"]
|
||||
|
||||
err_msg = "No data found for this date range, symbol may be delisted"
|
||||
raise_errors = False
|
||||
if "raise_errors" in kwargs and isinstance(kwargs["raise_errors"], bool):
|
||||
raise_errors = kwargs["raise_errors"]
|
||||
|
||||
if start or period is None or period.lower() == "max":
|
||||
# Check can get TZ. Fail => probably delisted
|
||||
try:
|
||||
tz = self._get_ticker_tz()
|
||||
except KeyError as e:
|
||||
if "exchangeTimezoneName" in str(e):
|
||||
shared._DFS[self.ticker] = utils.empty_df()
|
||||
shared._ERRORS[self.ticker] = err_msg
|
||||
if "many" not in kwargs and debug_mode:
|
||||
tz = self._get_ticker_tz(debug_mode, proxy, timeout)
|
||||
if tz is None:
|
||||
# Every valid ticker has a timezone. Missing = problem
|
||||
err_msg = "No timezone found, symbol certainly delisted"
|
||||
shared._DFS[self.ticker] = utils.empty_df()
|
||||
shared._ERRORS[self.ticker] = err_msg
|
||||
if "many" not in kwargs and debug_mode:
|
||||
if raise_errors:
|
||||
raise Exception('%s: %s' % (self.ticker, err_msg))
|
||||
else:
|
||||
print('- %s: %s' % (self.ticker, err_msg))
|
||||
return utils.empty_df()
|
||||
else:
|
||||
raise
|
||||
return utils.empty_df()
|
||||
|
||||
if end is None:
|
||||
end = int(_time.time())
|
||||
@@ -215,28 +225,31 @@ class TickerBase():
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if data is None or not type(data) is dict or 'status_code' in data.keys():
|
||||
shared._DFS[self.ticker] = utils.empty_df()
|
||||
shared._ERRORS[self.ticker] = err_msg
|
||||
if "many" not in kwargs and debug_mode:
|
||||
print('- %s: %s' % (self.ticker, err_msg))
|
||||
return utils.empty_df()
|
||||
|
||||
if "chart" in data and data["chart"]["error"]:
|
||||
err_msg = "No data found for this date range, symbol may be delisted"
|
||||
fail = False
|
||||
if data is None or not type(data) is dict:
|
||||
fail = True
|
||||
elif type(data) is dict and 'status_code' in data.keys():
|
||||
err_msg += "(Yahoo status_code = {})".format(data["status_code"])
|
||||
fail = True
|
||||
elif "chart" in data and data["chart"]["error"]:
|
||||
err_msg = data["chart"]["error"]["description"]
|
||||
fail = True
|
||||
elif not "chart" in data or data["chart"]["result"] is None or not data["chart"]["result"]:
|
||||
fail = True
|
||||
elif not period is None and not "timestamp" in data["chart"]["result"][0] and not period in data["chart"]["result"][0]["meta"]["validRanges"]:
|
||||
# User provided a bad period. The minimum should be '1d', but sometimes Yahoo accepts '1h'.
|
||||
err_msg = "Period '{}' is invalid, must be one of {}".format(period, data["chart"]["result"][0]["meta"]["validRanges"])
|
||||
fail = True
|
||||
if fail:
|
||||
shared._DFS[self.ticker] = utils.empty_df()
|
||||
shared._ERRORS[self.ticker] = err_msg
|
||||
if "many" not in kwargs and debug_mode:
|
||||
print('- %s: %s' % (self.ticker, err_msg))
|
||||
return shared._DFS[self.ticker]
|
||||
|
||||
elif "chart" not in data or data["chart"]["result"] is None or \
|
||||
not data["chart"]["result"]:
|
||||
shared._DFS[self.ticker] = utils.empty_df()
|
||||
shared._ERRORS[self.ticker] = err_msg
|
||||
if "many" not in kwargs and debug_mode:
|
||||
print('- %s: %s' % (self.ticker, err_msg))
|
||||
return shared._DFS[self.ticker]
|
||||
if raise_errors:
|
||||
raise Exception('%s: %s' % (self.ticker, err_msg))
|
||||
else:
|
||||
print('%s: %s' % (self.ticker, err_msg))
|
||||
return utils.empty_df()
|
||||
|
||||
# parse quotes
|
||||
try:
|
||||
@@ -250,7 +263,10 @@ class TickerBase():
|
||||
shared._DFS[self.ticker] = utils.empty_df()
|
||||
shared._ERRORS[self.ticker] = err_msg
|
||||
if "many" not in kwargs and debug_mode:
|
||||
print('- %s: %s' % (self.ticker, err_msg))
|
||||
if raise_errors:
|
||||
raise Exception('%s: %s' % (self.ticker, err_msg))
|
||||
else:
|
||||
print('%s: %s' % (self.ticker, err_msg))
|
||||
return shared._DFS[self.ticker]
|
||||
|
||||
# 2) fix weired bug with Yahoo! - returning 60m for 30m bars
|
||||
@@ -273,6 +289,17 @@ class TickerBase():
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
tz_exchange = data["chart"]["result"][0]["meta"]["exchangeTimezoneName"]
|
||||
|
||||
# Note: ordering is important. If you change order, run the tests!
|
||||
quotes = utils.fix_Yahoo_returning_live_separate(quotes, params["interval"], tz_exchange)
|
||||
quotes = utils.set_df_tz(quotes, params["interval"], tz_exchange)
|
||||
quotes = utils.fix_Yahoo_dst_issue(quotes, params["interval"])
|
||||
if repair:
|
||||
# Do this before auto/back adjust
|
||||
quotes = self._fix_unit_mixups(quotes, interval, tz_exchange)
|
||||
|
||||
# Auto/back adjust
|
||||
try:
|
||||
if auto_adjust:
|
||||
quotes = utils.auto_adjust(quotes)
|
||||
@@ -286,7 +313,10 @@ class TickerBase():
|
||||
shared._DFS[self.ticker] = utils.empty_df()
|
||||
shared._ERRORS[self.ticker] = err_msg
|
||||
if "many" not in kwargs and debug_mode:
|
||||
print('- %s: %s' % (self.ticker, err_msg))
|
||||
if raise_errors:
|
||||
raise Exception('%s: %s' % (self.ticker, err_msg))
|
||||
else:
|
||||
print('%s: %s' % (self.ticker, err_msg))
|
||||
|
||||
if rounding:
|
||||
quotes = _np.round(quotes, data[
|
||||
@@ -295,27 +325,41 @@ class TickerBase():
|
||||
|
||||
# actions
|
||||
dividends, splits = utils.parse_actions(data["chart"]["result"][0])
|
||||
if start is not None:
|
||||
startDt = _pd.to_datetime(_datetime.datetime.utcfromtimestamp(start))
|
||||
if dividends is not None:
|
||||
dividends = dividends[dividends.index>=startDt]
|
||||
if splits is not None:
|
||||
splits = splits[splits.index>=startDt]
|
||||
if end is not None:
|
||||
endDt = _pd.to_datetime(_datetime.datetime.utcfromtimestamp(end))
|
||||
if dividends is not None:
|
||||
dividends = dividends[dividends.index<endDt]
|
||||
if splits is not None:
|
||||
splits = splits[splits.index<endDt]
|
||||
if splits is not None:
|
||||
splits = utils.set_df_tz(splits, interval, tz_exchange)
|
||||
if dividends is not None:
|
||||
dividends = utils.set_df_tz(dividends, interval, tz_exchange)
|
||||
|
||||
tz_exchange = data["chart"]["result"][0]["meta"]["exchangeTimezoneName"]
|
||||
|
||||
# combine
|
||||
df = _pd.concat([quotes, dividends, splits], axis=1, sort=True)
|
||||
df["Dividends"].fillna(0, inplace=True)
|
||||
df["Stock Splits"].fillna(0, inplace=True)
|
||||
|
||||
# index eod/intraday
|
||||
df.index = df.index.tz_localize("UTC").tz_convert(tz_exchange)
|
||||
|
||||
df = utils.fix_Yahoo_dst_issue(df, params["interval"])
|
||||
|
||||
if params["interval"][-1] == "m":
|
||||
df.index.name = "Datetime"
|
||||
elif params["interval"] == "1h":
|
||||
pass
|
||||
# Combine
|
||||
df = quotes.sort_index()
|
||||
if dividends.shape[0] > 0:
|
||||
df = utils.safe_merge_dfs(df, dividends, interval)
|
||||
if "Dividends" in df.columns:
|
||||
df.loc[df["Dividends"].isna(),"Dividends"] = 0
|
||||
else:
|
||||
df["Dividends"] = 0.0
|
||||
if splits.shape[0] > 0:
|
||||
df = utils.safe_merge_dfs(df, splits, interval)
|
||||
if "Stock Splits" in df.columns:
|
||||
df.loc[df["Stock Splits"].isna(),"Stock Splits"] = 0
|
||||
else:
|
||||
df["Stock Splits"] = 0.0
|
||||
|
||||
if params["interval"][-1] in ("m",'h'):
|
||||
df.index.name = "Datetime"
|
||||
else:
|
||||
# If a midnight is during DST transition hour when clocks roll back,
|
||||
# meaning clock hits midnight twice, then use the 2nd (ambiguous=True)
|
||||
df.index = _pd.to_datetime(df.index.date).tz_localize(tz_exchange, ambiguous=True)
|
||||
df.index.name = "Date"
|
||||
|
||||
# duplicates and missing rows cleanup
|
||||
@@ -331,22 +375,229 @@ class TickerBase():
|
||||
|
||||
# ------------------------
|
||||
|
||||
def _get_ticker_tz(self):
|
||||
if not self._tz is None:
|
||||
def _fix_unit_mixups(self, df, interval, tz_exchange):
|
||||
# Sometimes Yahoo returns few prices in cents/pence instead of $/£
|
||||
# I.e. 100x bigger
|
||||
# Easy to detect and fix, just look for outliers = ~100x local median
|
||||
|
||||
if df.shape[0] == 0:
|
||||
return df
|
||||
if df.shape[0] == 1:
|
||||
# Need multiple rows to confidently identify outliers
|
||||
return df
|
||||
|
||||
if df.index.tz is None:
|
||||
df.index = df.index.tz_localize(tz_exchange)
|
||||
else:
|
||||
df.index = df.index.tz_convert(tz_exchange)
|
||||
|
||||
# Only import scipy if users actually want function. To avoid
|
||||
# adding it to dependencies.
|
||||
from scipy import ndimage as _ndimage
|
||||
|
||||
data_cols = ["Open","High","Low","Close"]
|
||||
data_cols = [c for c in data_cols if c in df.columns]
|
||||
n = df.shape[0]
|
||||
median = _ndimage.median_filter(df[data_cols].values, size=(3,3), mode='mirror')
|
||||
|
||||
if (median==0).any():
|
||||
raise Exception("median contains zeroes, why?")
|
||||
ratio = df[data_cols].values/median
|
||||
# ratio_rounded = (ratio/5).round()*5 # round ratio to nearest 5
|
||||
ratio_rounded = (ratio/10).round()*10 # round ratio to nearest 10
|
||||
f = (ratio_rounded)==100
|
||||
|
||||
# Store each mixup:
|
||||
mixups = {}
|
||||
for j in range(len(data_cols)):
|
||||
fj = f[:,j]
|
||||
if fj.any():
|
||||
dc = data_cols[j]
|
||||
for i in _np.where(fj)[0]:
|
||||
idx = df.index[i]
|
||||
if idx not in mixups:
|
||||
mixups[idx] = {"data":df.loc[idx,data_cols], "fields":set([dc])}
|
||||
else:
|
||||
mixups[idx]["fields"].add(dc)
|
||||
n_mixups = len(mixups)
|
||||
|
||||
if len(mixups) > 0:
|
||||
# Problem with Yahoo's mixup is they calculate high & low after, so they can be corrupted.
|
||||
# If interval is weekly then can correct with daily. But if smaller intervals then
|
||||
# restricted to recent times:
|
||||
# - daily = hourly restricted to last 730 days
|
||||
sub_interval = None
|
||||
td_range = None
|
||||
if interval == "1wk":
|
||||
# Correct by fetching week of daily data
|
||||
sub_interval = "1d"
|
||||
td_range = _datetime.timedelta(days=7)
|
||||
elif interval == "1d":
|
||||
# Correct by fetching day of hourly data
|
||||
sub_interval = "1h"
|
||||
td_range = _datetime.timedelta(days=1)
|
||||
else:
|
||||
print("WARNING: Have not implemented repair for '{}' interval. Contact developers".format(interval))
|
||||
return df
|
||||
|
||||
# This first pass will correct all errors in Open/Close/Adj Close columns.
|
||||
# It will also *attempt* to correct Low/High columns, but only if can get price data.
|
||||
for idx in sorted(list(mixups.keys())):
|
||||
m = mixups[idx]
|
||||
# Although only some fields in row exhibit 100x error, normally the other fields are also corrupted,
|
||||
# so need to recalculate all fields in row.
|
||||
|
||||
if td_range is None:
|
||||
raise Exception("was hoping this wouldn't happen")
|
||||
|
||||
start = idx.date()
|
||||
if sub_interval=="1h" and (_datetime.date.today()-start) > _datetime.timedelta(days=729):
|
||||
# Don't bother requesting more price data, Yahoo will reject
|
||||
pass
|
||||
else:
|
||||
if sub_interval=="1h":
|
||||
df_fine = self.history(start=idx.date(), end=idx.date()+td_range, interval=sub_interval, auto_adjust=False)
|
||||
else:
|
||||
df_fine = self.history(start=idx.date()-td_range, end=idx.date()+td_range, interval=sub_interval, auto_adjust=False)
|
||||
|
||||
# First, check whether df_fine has different split-adjustment than df.
|
||||
# If it is different, then adjust df_fine to match df
|
||||
good_fields = list(set(data_cols)-m["fields"])
|
||||
median = df.loc[idx,good_fields].median()
|
||||
median_fine = _np.median(df_fine[good_fields].values)
|
||||
ratio = round(median/median_fine, 1)
|
||||
ratio_rcp = round(median_fine/median, 1)
|
||||
if ratio==1 and ratio_rcp==1:
|
||||
# Good!
|
||||
pass
|
||||
else:
|
||||
if ratio>1:
|
||||
# data has different split-adjustment than fine-grained data
|
||||
# Adjust fine-grained to match
|
||||
df_fine[data_cols] *= ratio
|
||||
elif ratio_rcp>1:
|
||||
# data has different split-adjustment than fine-grained data
|
||||
# Adjust fine-grained to match
|
||||
df_fine[data_cols] *= 1.0/ratio_rcp
|
||||
median_fine = _np.median(df_fine[good_fields].values)
|
||||
ratio = round(median/median_fine, 1)
|
||||
ratio_rcp = round(median_fine/median, 1)
|
||||
|
||||
if sub_interval != "1h":
|
||||
# dt_before_week = df_fine.index[df_fine.index.get_loc(idx)-1]
|
||||
df_last_week = df_fine[df_fine.index<idx]
|
||||
df_fine = df_fine[df_fine.index>=idx]
|
||||
|
||||
if "High" in m["fields"]:
|
||||
df.loc[idx, "High"] = df_fine["High"].max()
|
||||
m["fields"].remove("High")
|
||||
if "Low" in m["fields"]:
|
||||
df.loc[idx, "Low"] = df_fine["Low"].min()
|
||||
m["fields"].remove("Low")
|
||||
if "Open" in m["fields"]:
|
||||
if sub_interval != "1h" and idx != df_fine.index[0]:
|
||||
# Exchange closed Monday. In this case, Yahoo sets Open to last week close
|
||||
df.loc[idx, "Open"] = df_last_week["Close"][-1]
|
||||
df.loc[idx, "Low"] = min(df.loc[idx, "Open"], df.loc[idx, "Low"])
|
||||
else:
|
||||
df.loc[idx, "Open"] = df_fine["Open"].iloc[0]
|
||||
m["fields"].remove("Open")
|
||||
if "Close" in m["fields"]:
|
||||
df.loc[idx, "Close"] = df_fine["Close"].iloc[-1]
|
||||
m["fields"].remove("Close")
|
||||
# Assume 'Adj Close' also corrupted, easier than detecting whether true
|
||||
df.loc[idx, "Adj Close"] = df_fine["Adj Close"].iloc[-1]
|
||||
|
||||
if len(m["fields"])==0:
|
||||
del mixups[idx]
|
||||
|
||||
# This second pass will *crudely* "fix" any remaining errors in High/Low
|
||||
# simply by ensuring they don't contradict e.g. Low = 100x High
|
||||
if len(mixups)>0:
|
||||
for idx in sorted(list(mixups.keys())):
|
||||
m = mixups[idx]
|
||||
row = df.loc[idx,["Open","Close"]]
|
||||
if "High" in m["fields"]:
|
||||
df.loc[idx,"High"] = row.max()
|
||||
m["fields"].remove("High")
|
||||
if "Low" in m["fields"]:
|
||||
df.loc[idx,"Low"] = row.min()
|
||||
m["fields"].remove("Low")
|
||||
|
||||
if len(m["fields"])==0:
|
||||
del mixups[idx]
|
||||
|
||||
n_fixed = n_mixups - len(mixups)
|
||||
print("{}: fixed {} currency unit mixups in {} price data".format(self.ticker, n_fixed, interval))
|
||||
if len(mixups)>0:
|
||||
print(" ... and failed to correct {}".format(len(mixups)))
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def _get_ticker_tz(self, debug_mode, proxy, timeout):
|
||||
if self._tz is not None:
|
||||
return self._tz
|
||||
cache = utils.get_tz_cache()
|
||||
tz = cache.lookup(self.ticker)
|
||||
|
||||
tkr_tz = utils.cache_lookup_tkr_tz(self.ticker)
|
||||
if tkr_tz is None:
|
||||
tkr_tz = self.info["exchangeTimezoneName"]
|
||||
# info fetch is relatively slow so cache timezone
|
||||
try:
|
||||
utils.cache_store_tkr_tz(self.ticker, tkr_tz)
|
||||
except PermissionError:
|
||||
# System probably read-only, so cannot cache
|
||||
pass
|
||||
if tz and not utils.is_valid_timezone(tz):
|
||||
# Clear from cache and force re-fetch
|
||||
cache.store(self.ticker, None)
|
||||
tz = None
|
||||
|
||||
self._tz = tkr_tz
|
||||
return tkr_tz
|
||||
if tz is None:
|
||||
tz = self._fetch_ticker_tz(debug_mode, proxy, timeout)
|
||||
|
||||
if utils.is_valid_timezone(tz):
|
||||
# info fetch is relatively slow so cache timezone
|
||||
cache.store(self.ticker, tz)
|
||||
else:
|
||||
tz = None
|
||||
|
||||
self._tz = tz
|
||||
return tz
|
||||
|
||||
|
||||
def _fetch_ticker_tz(self, debug_mode, proxy, timeout):
|
||||
# Query Yahoo for basic price data just to get returned timezone
|
||||
|
||||
params = {"range":"1d", "interval":"1d"}
|
||||
|
||||
# setup proxy in requests format
|
||||
if proxy is not None:
|
||||
if isinstance(proxy, dict) and "https" in proxy:
|
||||
proxy = proxy["https"]
|
||||
proxy = {"https": proxy}
|
||||
|
||||
# Getting data from json
|
||||
url = "{}/v8/finance/chart/{}".format(self._base_url, self.ticker)
|
||||
|
||||
session = self.session or _requests
|
||||
try:
|
||||
data = session.get(url=url, params=params, proxies=proxy, headers=utils.user_agent_headers, timeout=timeout)
|
||||
data = data.json()
|
||||
except Exception as e:
|
||||
if debug_mode:
|
||||
print("Failed to get ticker '{}' reason: {}".format(self.ticker, e))
|
||||
return None
|
||||
else:
|
||||
error = data.get('chart', {}).get('error', None)
|
||||
if error:
|
||||
# explicit error from yahoo API
|
||||
if debug_mode:
|
||||
print("Got error from yahoo api for ticker {}, Error: {}".format(self.ticker, error))
|
||||
else:
|
||||
try:
|
||||
return data["chart"]["result"][0]["meta"]["exchangeTimezoneName"]
|
||||
except Exception as err:
|
||||
if debug_mode:
|
||||
print("Could not get exchangeTimezoneName for ticker '{}' reason: {}".format(self.ticker, err))
|
||||
print("Got response: ")
|
||||
print("-------------")
|
||||
print(" {}".format(data))
|
||||
print("-------------")
|
||||
return None
|
||||
|
||||
def _get_info(self, proxy=None):
|
||||
# setup proxy in requests format
|
||||
@@ -355,10 +606,8 @@ class TickerBase():
|
||||
proxy = proxy["https"]
|
||||
proxy = {"https": proxy}
|
||||
|
||||
if (self._info is None) or (self._sustainability is None) or (self._recommendations is None):
|
||||
## Need to fetch
|
||||
pass
|
||||
else:
|
||||
if (self._info is not None) or (self._sustainability is not None) or (self._recommendations):
|
||||
# No need to fetch
|
||||
return
|
||||
|
||||
ticker_url = "{}/{}".format(self._scrape_url, self.ticker)
|
||||
@@ -929,7 +1178,7 @@ class TickerBase():
|
||||
dates[cn] = _pd.to_datetime(dates[cn], format="%b %d, %Y, %I %p")
|
||||
# - instead of attempting decoding of ambiguous timezone abbreviation, just use 'info':
|
||||
dates[cn] = dates[cn].dt.tz_localize(
|
||||
tz=self.info["exchangeTimezoneName"])
|
||||
tz=self.get_info()["exchangeTimezoneName"])
|
||||
|
||||
dates = dates.set_index("Earnings Date")
|
||||
|
||||
|
||||
@@ -30,9 +30,9 @@ from . import shared
|
||||
|
||||
|
||||
def download(tickers, start=None, end=None, actions=False, threads=True, ignore_tz=True,
|
||||
group_by='column', auto_adjust=False, back_adjust=False, keepna=False,
|
||||
group_by='column', auto_adjust=False, back_adjust=False, repair=False, keepna=False,
|
||||
progress=True, period="max", show_errors=True, interval="1d", prepost=False,
|
||||
proxy=None, rounding=False, timeout=None, **kwargs):
|
||||
proxy=None, rounding=False, timeout=10, **kwargs):
|
||||
"""Download yahoo tickers
|
||||
:Parameters:
|
||||
tickers : str, list
|
||||
@@ -56,6 +56,9 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
|
||||
Default is False
|
||||
auto_adjust: bool
|
||||
Adjust all OHLC automatically? Default is False
|
||||
repair: bool
|
||||
Detect currency unit 100x mixups and attempt repair
|
||||
Default is False
|
||||
keepna: bool
|
||||
Keep NaN rows returned by Yahoo?
|
||||
Default is False
|
||||
@@ -111,7 +114,7 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
|
||||
_download_one_threaded(ticker, period=period, interval=interval,
|
||||
start=start, end=end, prepost=prepost,
|
||||
actions=actions, auto_adjust=auto_adjust,
|
||||
back_adjust=back_adjust, keepna=keepna,
|
||||
back_adjust=back_adjust, repair=repair, keepna=keepna,
|
||||
progress=(progress and i > 0), proxy=proxy,
|
||||
rounding=rounding, timeout=timeout)
|
||||
while len(shared._DFS) < len(tickers):
|
||||
@@ -123,7 +126,8 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
|
||||
data = _download_one(ticker, period=period, interval=interval,
|
||||
start=start, end=end, prepost=prepost,
|
||||
actions=actions, auto_adjust=auto_adjust,
|
||||
back_adjust=back_adjust, keepna=keepna, proxy=proxy,
|
||||
back_adjust=back_adjust, repair=repair, keepna=keepna,
|
||||
proxy=proxy,
|
||||
rounding=rounding, timeout=timeout)
|
||||
shared._DFS[ticker.upper()] = data
|
||||
if progress:
|
||||
@@ -191,12 +195,12 @@ def _realign_dfs():
|
||||
|
||||
@_multitasking.task
|
||||
def _download_one_threaded(ticker, start=None, end=None,
|
||||
auto_adjust=False, back_adjust=False,
|
||||
auto_adjust=False, back_adjust=False, repair=False,
|
||||
actions=False, progress=True, period="max",
|
||||
interval="1d", prepost=False, proxy=None,
|
||||
keepna=False, rounding=False, timeout=None):
|
||||
keepna=False, rounding=False, timeout=10):
|
||||
|
||||
data = _download_one(ticker, start, end, auto_adjust, back_adjust,
|
||||
data = _download_one(ticker, start, end, auto_adjust, back_adjust, repair,
|
||||
actions, period, interval, prepost, proxy, rounding,
|
||||
keepna, timeout)
|
||||
shared._DFS[ticker.upper()] = data
|
||||
@@ -205,14 +209,14 @@ def _download_one_threaded(ticker, start=None, end=None,
|
||||
|
||||
|
||||
def _download_one(ticker, start=None, end=None,
|
||||
auto_adjust=False, back_adjust=False,
|
||||
auto_adjust=False, back_adjust=False, repair=False,
|
||||
actions=False, period="max", interval="1d",
|
||||
prepost=False, proxy=None, rounding=False,
|
||||
keepna=False, timeout=None):
|
||||
keepna=False, timeout=10):
|
||||
|
||||
return Ticker(ticker).history(period=period, interval=interval,
|
||||
start=start, end=end, prepost=prepost,
|
||||
actions=actions, auto_adjust=auto_adjust,
|
||||
back_adjust=back_adjust, proxy=proxy,
|
||||
back_adjust=back_adjust, repair=repair, proxy=proxy,
|
||||
rounding=rounding, keepna=keepna, many=True,
|
||||
timeout=timeout)
|
||||
|
||||
@@ -46,27 +46,31 @@ class Tickers():
|
||||
|
||||
def history(self, period="1mo", interval="1d",
|
||||
start=None, end=None, prepost=False,
|
||||
actions=True, auto_adjust=True, proxy=None,
|
||||
actions=True, auto_adjust=True, repair=False,
|
||||
proxy=None,
|
||||
threads=True, group_by='column', progress=True,
|
||||
timeout=None, **kwargs):
|
||||
timeout=10, **kwargs):
|
||||
|
||||
return self.download(
|
||||
period, interval,
|
||||
start, end, prepost,
|
||||
actions, auto_adjust, proxy,
|
||||
actions, auto_adjust, repair,
|
||||
proxy,
|
||||
threads, group_by, progress,
|
||||
timeout, **kwargs)
|
||||
|
||||
def download(self, period="1mo", interval="1d",
|
||||
start=None, end=None, prepost=False,
|
||||
actions=True, auto_adjust=True, proxy=None,
|
||||
actions=True, auto_adjust=True, repair=False,
|
||||
proxy=None,
|
||||
threads=True, group_by='column', progress=True,
|
||||
timeout=None, **kwargs):
|
||||
timeout=10, **kwargs):
|
||||
|
||||
data = multi.download(self.symbols,
|
||||
start=start, end=end,
|
||||
actions=actions,
|
||||
auto_adjust=auto_adjust,
|
||||
repair=repair,
|
||||
period=period,
|
||||
interval=interval,
|
||||
prepost=prepost,
|
||||
|
||||
@@ -22,6 +22,8 @@
|
||||
from __future__ import print_function
|
||||
|
||||
import datetime as _datetime
|
||||
from typing import Dict, Union
|
||||
|
||||
import pytz as _tz
|
||||
import requests as _requests
|
||||
import re as _re
|
||||
@@ -30,6 +32,12 @@ import numpy as _np
|
||||
import sys as _sys
|
||||
import os as _os
|
||||
import appdirs as _ad
|
||||
import sqlite3 as _sqlite3
|
||||
import atexit as _atexit
|
||||
|
||||
from threading import Lock
|
||||
|
||||
from pytz import UnknownTimeZoneError
|
||||
|
||||
try:
|
||||
import ujson as _json
|
||||
@@ -85,7 +93,9 @@ def get_news_by_isin(isin, proxy=None, session=None):
|
||||
return data.get('news', {})
|
||||
|
||||
|
||||
def empty_df(index=[]):
|
||||
def empty_df(index=None):
|
||||
if index is None:
|
||||
index = []
|
||||
empty = _pd.DataFrame(index=index, data={
|
||||
'Open': _np.nan, 'High': _np.nan, 'Low': _np.nan,
|
||||
'Close': _np.nan, 'Adj Close': _np.nan, 'Volume': _np.nan})
|
||||
@@ -247,11 +257,197 @@ def parse_actions(data):
|
||||
splits.sort_index(inplace=True)
|
||||
splits["Stock Splits"] = splits["numerator"] / \
|
||||
splits["denominator"]
|
||||
splits = splits["Stock Splits"]
|
||||
splits = splits[["Stock Splits"]]
|
||||
|
||||
return dividends, splits
|
||||
|
||||
|
||||
def set_df_tz(df, interval, tz):
|
||||
if df.index.tz is None:
|
||||
df.index = df.index.tz_localize("UTC")
|
||||
df.index = df.index.tz_convert(tz)
|
||||
if interval in ["1d","1w","1wk","1mo","3mo"]:
|
||||
# If localizing a midnight during DST transition hour when clocks roll back,
|
||||
# meaning clock hits midnight twice, then use the 2nd (ambiguous=True)
|
||||
df.index = _pd.to_datetime(df.index.date).tz_localize(tz, ambiguous=True)
|
||||
return df
|
||||
|
||||
|
||||
def fix_Yahoo_returning_live_separate(quotes, interval, tz_exchange):
|
||||
# Yahoo bug fix. If market is open today then Yahoo normally returns
|
||||
# todays data as a separate row from rest-of week/month interval in above row.
|
||||
# Seems to depend on what exchange e.g. crypto OK.
|
||||
# Fix = merge them together
|
||||
n = quotes.shape[0]
|
||||
if n > 1:
|
||||
dt1 = quotes.index[n-1]
|
||||
dt2 = quotes.index[n-2]
|
||||
if quotes.index.tz is None:
|
||||
dt1 = dt1.tz_localize("UTC")
|
||||
dt2 = dt2.tz_localize("UTC")
|
||||
dt1 = dt1.tz_convert(tz_exchange)
|
||||
dt2 = dt2.tz_convert(tz_exchange)
|
||||
if interval in ["1wk", "1mo", "3mo"]:
|
||||
if interval == "1wk":
|
||||
last_rows_same_interval = dt1.year==dt2.year and dt1.week==dt2.week
|
||||
elif interval == "1mo":
|
||||
last_rows_same_interval = dt1.month==dt2.month
|
||||
elif interval == "3mo":
|
||||
last_rows_same_interval = dt1.year==dt2.year and dt1.quarter==dt2.quarter
|
||||
if last_rows_same_interval:
|
||||
# Last two rows are within same interval
|
||||
idx1 = quotes.index[n-1]
|
||||
idx2 = quotes.index[n-2]
|
||||
if _np.isnan(quotes.loc[idx2,"Open"]):
|
||||
quotes.loc[idx2,"Open"] = quotes["Open"][n-1]
|
||||
# Note: nanmax() & nanmin() ignores NaNs
|
||||
quotes.loc[idx2,"High"] = _np.nanmax([quotes["High"][n-1], quotes["High"][n-2]])
|
||||
quotes.loc[idx2,"Low"] = _np.nanmin([quotes["Low"][n-1], quotes["Low"][n-2]])
|
||||
quotes.loc[idx2,"Close"] = quotes["Close"][n-1]
|
||||
if "Adj High" in quotes.columns:
|
||||
quotes.loc[idx2,"Adj High"] = _np.nanmax([quotes["Adj High"][n-1], quotes["Adj High"][n-2]])
|
||||
if "Adj Low" in quotes.columns:
|
||||
quotes.loc[idx2,"Adj Low"] = _np.nanmin([quotes["Adj Low"][n-1], quotes["Adj Low"][n-2]])
|
||||
if "Adj Close" in quotes.columns:
|
||||
quotes.loc[idx2,"Adj Close"] = quotes["Adj Close"][n-1]
|
||||
quotes.loc[idx2,"Volume"] += quotes["Volume"][n-1]
|
||||
quotes = quotes.drop(quotes.index[n-1])
|
||||
|
||||
# Similar bug in daily data except most data is simply duplicated
|
||||
# - exception is volume, *slightly* greater on final row (and matches website)
|
||||
elif interval=="1d":
|
||||
if dt1.date() == dt2.date():
|
||||
# Last two rows are on same day. Drop second-to-last row
|
||||
quotes = quotes.drop(quotes.index[n-2])
|
||||
|
||||
return quotes
|
||||
|
||||
|
||||
def safe_merge_dfs(df_main, df_sub, interval):
|
||||
# Carefully merge 'df_sub' onto 'df_main'
|
||||
# If naive merge fails, try again with reindexing df_sub:
|
||||
# 1) if interval is weekly or monthly, then try with index set to start of week/month
|
||||
# 2) if still failing then manually search through df_main.index to reindex df_sub
|
||||
|
||||
if df_sub.shape[0] == 0:
|
||||
raise Exception("No data to merge")
|
||||
|
||||
df_sub_backup = df_sub.copy()
|
||||
data_cols = [c for c in df_sub.columns if c not in df_main]
|
||||
if len(data_cols) > 1:
|
||||
raise Exception("Expected 1 data col")
|
||||
data_col = data_cols[0]
|
||||
|
||||
def _reindex_events(df, new_index, data_col_name):
|
||||
if len(new_index) == len(set(new_index)):
|
||||
# No duplicates, easy
|
||||
df.index = new_index
|
||||
return df
|
||||
|
||||
df["_NewIndex"] = new_index
|
||||
# Duplicates present within periods but can aggregate
|
||||
if data_col_name == "Dividends":
|
||||
# Add
|
||||
df = df.groupby("_NewIndex").sum()
|
||||
df.index.name = None
|
||||
elif data_col_name == "Stock Splits":
|
||||
# Product
|
||||
df = df.groupby("_NewIndex").prod()
|
||||
df.index.name = None
|
||||
else:
|
||||
raise Exception("New index contains duplicates but unsure how to aggregate for '{}'".format(data_col_name))
|
||||
if "_NewIndex" in df.columns:
|
||||
df = df.drop("_NewIndex",axis=1)
|
||||
return df
|
||||
|
||||
df = df_main.join(df_sub)
|
||||
|
||||
f_na = df[data_col].isna()
|
||||
data_lost = sum(~f_na) < df_sub.shape[0]
|
||||
if not data_lost:
|
||||
return df
|
||||
# Lost data during join()
|
||||
if interval in ["1wk","1mo","3mo"]:
|
||||
# Backdate all df_sub.index dates to start of week/month
|
||||
if interval == "1wk":
|
||||
new_index = _pd.PeriodIndex(df_sub.index, freq='W').to_timestamp()
|
||||
elif interval == "1mo":
|
||||
new_index = _pd.PeriodIndex(df_sub.index, freq='M').to_timestamp()
|
||||
elif interval == "3mo":
|
||||
new_index = _pd.PeriodIndex(df_sub.index, freq='Q').to_timestamp()
|
||||
new_index = new_index.tz_localize(df.index.tz, ambiguous=True)
|
||||
df_sub = _reindex_events(df_sub, new_index, data_col)
|
||||
df = df_main.join(df_sub)
|
||||
|
||||
f_na = df[data_col].isna()
|
||||
data_lost = sum(~f_na) < df_sub.shape[0]
|
||||
if not data_lost:
|
||||
return df
|
||||
# Lost data during join(). Manually check each df_sub.index date against df_main.index to
|
||||
# find matching interval
|
||||
df_sub = df_sub_backup.copy()
|
||||
new_index = [-1]*df_sub.shape[0]
|
||||
for i in range(df_sub.shape[0]):
|
||||
dt_sub_i = df_sub.index[i]
|
||||
if dt_sub_i in df_main.index:
|
||||
new_index[i] = dt_sub_i ; continue
|
||||
# Found a bad index date, need to search for near-match in df_main (same week/month)
|
||||
fixed = False
|
||||
for j in range(df_main.shape[0]-1):
|
||||
dt_main_j0 = df_main.index[j]
|
||||
dt_main_j1 = df_main.index[j+1]
|
||||
if (dt_main_j0 <= dt_sub_i) and (dt_sub_i < dt_main_j1):
|
||||
fixed = True
|
||||
if interval.endswith('h') or interval.endswith('m'):
|
||||
# Must also be same day
|
||||
fixed = (dt_main_j0.date() == dt_sub_i.date()) and (dt_sub_i.date() == dt_main_j1.date())
|
||||
if fixed:
|
||||
dt_sub_i = dt_main_j0 ; break
|
||||
if not fixed:
|
||||
last_main_dt = df_main.index[df_main.shape[0]-1]
|
||||
diff = dt_sub_i - last_main_dt
|
||||
if interval == "1mo" and last_main_dt.month == dt_sub_i.month:
|
||||
dt_sub_i = last_main_dt ; fixed = True
|
||||
elif interval == "3mo" and last_main_dt.year == dt_sub_i.year and last_main_dt.quarter == dt_sub_i.quarter:
|
||||
dt_sub_i = last_main_dt ; fixed = True
|
||||
elif interval == "1wk":
|
||||
if last_main_dt.week == dt_sub_i.week:
|
||||
dt_sub_i = last_main_dt ; fixed = True
|
||||
elif (dt_sub_i>=last_main_dt) and (dt_sub_i-last_main_dt < _datetime.timedelta(weeks=1)):
|
||||
# With some specific start dates (e.g. around early Jan), Yahoo
|
||||
# messes up start-of-week, is Saturday not Monday. So check
|
||||
# if same week another way
|
||||
dt_sub_i = last_main_dt ; fixed = True
|
||||
elif interval == "1d" and last_main_dt.day == dt_sub_i.day:
|
||||
dt_sub_i = last_main_dt ; fixed = True
|
||||
elif interval == "1h" and last_main_dt.hour == dt_sub_i.hour:
|
||||
dt_sub_i = last_main_dt ; fixed = True
|
||||
elif interval.endswith('m') or interval.endswith('h'):
|
||||
td = _pd.to_timedelta(interval)
|
||||
if (dt_sub_i>=last_main_dt) and (dt_sub_i-last_main_dt < td):
|
||||
dt_sub_i = last_main_dt ; fixed = True
|
||||
new_index[i] = dt_sub_i
|
||||
df_sub = _reindex_events(df_sub, new_index, data_col)
|
||||
df = df_main.join(df_sub)
|
||||
|
||||
f_na = df[data_col].isna()
|
||||
data_lost = sum(~f_na) < df_sub.shape[0]
|
||||
if data_lost:
|
||||
## Not always possible to match events with trading, e.g. when released pre-market.
|
||||
## So have to append to bottom with nan prices.
|
||||
## But should only be impossible with intra-day price data.
|
||||
if interval.endswith('m') or interval.endswith('h'):
|
||||
f_missing = ~df_sub.index.isin(df.index)
|
||||
df_sub_missing = df_sub[f_missing]
|
||||
keys = set(["Adj Open", "Open", "Adj High", "High", "Adj Low", "Low", "Adj Close", "Close"]).intersection(df.columns)
|
||||
df_sub_missing[list(keys)] = _np.nan
|
||||
df = _pd.concat([df, df_sub_missing], sort=True)
|
||||
else:
|
||||
raise Exception("Lost data during merge despite all attempts to align data (see above)")
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def fix_Yahoo_dst_issue(df, interval):
|
||||
if interval in ["1d","1w","1wk"]:
|
||||
# These intervals should start at time 00:00. But for some combinations of date and timezone,
|
||||
@@ -265,6 +461,14 @@ def fix_Yahoo_dst_issue(df, interval):
|
||||
return df
|
||||
|
||||
|
||||
def is_valid_timezone(tz: str) -> bool:
|
||||
try:
|
||||
_tz.timezone(tz)
|
||||
except UnknownTimeZoneError:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
class ProgressBar:
|
||||
def __init__(self, iterations, text='completed'):
|
||||
self.text = text
|
||||
@@ -315,44 +519,157 @@ class ProgressBar:
|
||||
return str(self.prog_bar)
|
||||
|
||||
|
||||
# Simple file cache of ticker->timezone:
|
||||
_cache_dp = None
|
||||
def get_cache_dirpath():
|
||||
if _cache_dp is None:
|
||||
dp = _os.path.join(_ad.user_cache_dir(), "py-yfinance")
|
||||
else:
|
||||
dp = _os.path.join(_cache_dp, "py-yfinance")
|
||||
return dp
|
||||
def set_tz_cache_location(dp):
|
||||
global _cache_dp
|
||||
_cache_dp = dp
|
||||
# ---------------------------------
|
||||
# TimeZone cache related code
|
||||
# ---------------------------------
|
||||
|
||||
def cache_lookup_tkr_tz(tkr):
|
||||
fp = _os.path.join(get_cache_dirpath(), "tkr-tz.csv")
|
||||
if not _os.path.isfile(fp):
|
||||
class _KVStore:
|
||||
"""Simpel Sqlite backed key/value store, key and value are strings. Should be thread safe."""
|
||||
|
||||
def __init__(self, filename):
|
||||
self._cache_mutex = Lock()
|
||||
with self._cache_mutex:
|
||||
self.conn = _sqlite3.connect(filename, timeout=10, check_same_thread=False)
|
||||
self.conn.execute('pragma journal_mode=wal')
|
||||
self.conn.execute('create table if not exists "kv" (key TEXT primary key, value TEXT) without rowid')
|
||||
self.conn.commit()
|
||||
_atexit.register(self.close)
|
||||
|
||||
def close(self):
|
||||
if self.conn is not None:
|
||||
with self._cache_mutex:
|
||||
self.conn.close()
|
||||
self.conn = None
|
||||
|
||||
def get(self, key: str) -> Union[str, None]:
|
||||
"""Get value for key if it exists else returns None"""
|
||||
item = self.conn.execute('select value from "kv" where key=?', (key,))
|
||||
if item:
|
||||
return next(item, (None,))[0]
|
||||
|
||||
def set(self, key: str, value: str) -> str:
|
||||
with self._cache_mutex:
|
||||
self.conn.execute('replace into "kv" (key, value) values (?,?)', (key, value))
|
||||
self.conn.commit()
|
||||
|
||||
def bulk_set(self, kvdata: Dict[str, str]):
|
||||
records = tuple(i for i in kvdata.items())
|
||||
with self._cache_mutex:
|
||||
self.conn.executemany('replace into "kv" (key, value) values (?,?)', records)
|
||||
self.conn.commit()
|
||||
|
||||
def delete(self, key: str):
|
||||
with self._cache_mutex:
|
||||
self.conn.execute('delete from "kv" where key=?', (key,))
|
||||
self.conn.commit()
|
||||
|
||||
|
||||
class _TzCacheException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class _TzCache:
|
||||
"""Simple sqlite file cache of ticker->timezone"""
|
||||
|
||||
def __init__(self):
|
||||
self._tz_db = None
|
||||
self._setup_cache_folder()
|
||||
|
||||
def _setup_cache_folder(self):
|
||||
if not _os.path.isdir(self._db_dir):
|
||||
try:
|
||||
_os.makedirs(self._db_dir)
|
||||
except OSError as err:
|
||||
raise _TzCacheException("Error creating TzCache folder: '{}' reason: {}"
|
||||
.format(self._db_dir, err))
|
||||
|
||||
elif not (_os.access(self._db_dir, _os.R_OK) and _os.access(self._db_dir, _os.W_OK)):
|
||||
raise _TzCacheException("Cannot read and write in TzCache folder: '{}'"
|
||||
.format(self._db_dir, ))
|
||||
|
||||
def lookup(self, tkr):
|
||||
return self.tz_db.get(tkr)
|
||||
|
||||
def store(self, tkr, tz):
|
||||
if tz is None:
|
||||
self.tz_db.delete(tkr)
|
||||
elif self.tz_db.get(tkr) is not None:
|
||||
raise Exception("Tkr {} tz already in cache".format(tkr))
|
||||
else:
|
||||
self.tz_db.set(tkr, tz)
|
||||
|
||||
@property
|
||||
def _db_dir(self):
|
||||
global _cache_dir
|
||||
return _os.path.join(_cache_dir, "py-yfinance")
|
||||
|
||||
@property
|
||||
def tz_db(self):
|
||||
# lazy init
|
||||
if self._tz_db is None:
|
||||
self._tz_db = _KVStore(_os.path.join(self._db_dir, "tkr-tz.db"))
|
||||
self._migrate_cache_tkr_tz()
|
||||
|
||||
return self._tz_db
|
||||
|
||||
def _migrate_cache_tkr_tz(self):
|
||||
"""Migrate contents from old ticker CSV-cache to SQLite db"""
|
||||
fp = _os.path.join(self._db_dir, "tkr-tz.csv")
|
||||
if not _os.path.isfile(fp):
|
||||
return None
|
||||
df = _pd.read_csv(fp, index_col="Ticker")
|
||||
self.tz_db.bulk_set(df.to_dict()['Tz'])
|
||||
_os.remove(fp)
|
||||
|
||||
|
||||
class _TzCacheDummy:
|
||||
"""Dummy cache to use if tz cache is disabled"""
|
||||
|
||||
def lookup(self, tkr):
|
||||
return None
|
||||
|
||||
df = _pd.read_csv(fp)
|
||||
f = df["Ticker"] == tkr
|
||||
if sum(f) == 0:
|
||||
def store(self, tkr, tz):
|
||||
pass
|
||||
|
||||
@property
|
||||
def tz_db(self):
|
||||
return None
|
||||
|
||||
return df["Tz"][f].iloc[0]
|
||||
def cache_store_tkr_tz(tkr,tz):
|
||||
df = _pd.DataFrame({"Ticker":[tkr], "Tz":[tz]})
|
||||
|
||||
dp = get_cache_dirpath()
|
||||
if not _os.path.isdir(dp):
|
||||
_os.makedirs(dp)
|
||||
fp = _os.path.join(dp, "tkr-tz.csv")
|
||||
if not _os.path.isfile(fp):
|
||||
df.to_csv(fp, index=False)
|
||||
return
|
||||
def get_tz_cache():
|
||||
"""
|
||||
Get the timezone cache, initializes it and creates cache folder if needed on first call.
|
||||
If folder cannot be created for some reason it will fall back to initialize a
|
||||
dummy cache with same interface as real cash.
|
||||
"""
|
||||
# as this can be called from multiple threads, protect it.
|
||||
with _cache_init_lock:
|
||||
global _tz_cache
|
||||
if _tz_cache is None:
|
||||
try:
|
||||
_tz_cache = _TzCache()
|
||||
except _TzCacheException as err:
|
||||
print("Failed to create TzCache, reason: {}".format(err))
|
||||
print("TzCache will not be used.")
|
||||
print("Tip: You can direct cache to use a different location with 'set_tz_cache_location(mylocation)'")
|
||||
_tz_cache = _TzCacheDummy()
|
||||
|
||||
df_all = _pd.read_csv(fp)
|
||||
f = df_all["Ticker"]==tkr
|
||||
if sum(f) > 0:
|
||||
raise Exception("Tkr {} tz already in cache".format(tkr))
|
||||
return _tz_cache
|
||||
|
||||
_pd.concat([df_all,df]).to_csv(fp, index=False)
|
||||
|
||||
_cache_dir = _ad.user_cache_dir()
|
||||
_cache_init_lock = Lock()
|
||||
_tz_cache = None
|
||||
|
||||
|
||||
def set_tz_cache_location(cache_dir: str):
|
||||
"""
|
||||
Sets the path to create the "py-yfinance" cache folder in.
|
||||
Useful if the default folder returned by "appdir.user_cache_dir()" is not writable.
|
||||
Must be called before cache is used (that is, before fetching tickers).
|
||||
:param cache_dir: Path to use for caches
|
||||
:return: None
|
||||
"""
|
||||
global _cache_dir, _tz_cache
|
||||
assert _tz_cache is None, "Time Zone cache already initialized, setting path must be done before cache is created"
|
||||
_cache_dir = cache_dir
|
||||
|
||||
@@ -1 +1 @@
|
||||
version = "0.1.81"
|
||||
version = "0.2.0rc1"
|
||||
|
||||
Reference in New Issue
Block a user