Compare commits
65 Commits
0.2.5
...
feature/se
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
efd278a3e0 | ||
|
|
4d8ca3777a | ||
|
|
14c6136699 | ||
|
|
b462836540 | ||
|
|
645cc19037 | ||
|
|
86d6acccf7 | ||
|
|
4fa32a98ed | ||
|
|
35f4071c0b | ||
|
|
86b00091a9 | ||
|
|
2a2928b4a0 | ||
|
|
d47133e5bf | ||
|
|
8f0c58dafa | ||
|
|
27a721c7dd | ||
|
|
3e964d5319 | ||
|
|
84a31ae0b4 | ||
|
|
891b533ec2 | ||
|
|
b9fb3e4979 | ||
|
|
09342982a4 | ||
|
|
da8c49011e | ||
|
|
b805f0a010 | ||
|
|
5b0feb3d20 | ||
|
|
c3d7449844 | ||
|
|
a4f11b0243 | ||
|
|
464b3333d7 | ||
|
|
685f2ec351 | ||
|
|
aad46baf28 | ||
|
|
af5f96f97e | ||
|
|
a4bdaea888 | ||
|
|
ac5a9d2793 | ||
|
|
b17ad32a47 | ||
|
|
af39855e28 | ||
|
|
ac6e047f0d | ||
|
|
1e24337f29 | ||
|
|
2cc82ae12f | ||
|
|
d11f385049 | ||
|
|
7377611e1f | ||
|
|
f3b5fb85c9 | ||
|
|
a4faef83ac | ||
|
|
e1184f745b | ||
|
|
fe630008e9 | ||
|
|
b43072cf0a | ||
|
|
ad3f4cabc9 | ||
|
|
f70567872c | ||
|
|
a8ade72113 | ||
|
|
1dcc8c9c8b | ||
|
|
dd5462b307 | ||
|
|
e39c03e8e3 | ||
|
|
9297504b84 | ||
|
|
3971115ab9 | ||
|
|
b5badbbc61 | ||
|
|
ba8621f5be | ||
|
|
39c1ecc7a2 | ||
|
|
eb6d830e2a | ||
|
|
2b0ae5a6c1 | ||
|
|
1636839b67 | ||
|
|
65b97d024b | ||
|
|
197d2968e3 | ||
|
|
7460dbea17 | ||
|
|
b49fd797fc | ||
|
|
0ba810fda5 | ||
|
|
8b9faf15b3 | ||
|
|
71362f2252 | ||
|
|
287cb0786e | ||
|
|
0840b602b4 | ||
|
|
6c0b4ddb7b |
@@ -1,6 +1,19 @@
|
||||
Change Log
|
||||
===========
|
||||
|
||||
0.2.9
|
||||
-----
|
||||
- Fix fast_info bugs #1362
|
||||
|
||||
0.2.7
|
||||
-----
|
||||
- Fix Yahoo decryption, smarter this time #1353
|
||||
- Rename basic_info -> fast_info #1354
|
||||
|
||||
0.2.6
|
||||
-----
|
||||
- Fix Ticker.basic_info lazy-loading #1342
|
||||
|
||||
0.2.5
|
||||
-----
|
||||
- Fix Yahoo data decryption again #1336
|
||||
|
||||
73
README.md
73
README.md
@@ -42,12 +42,10 @@ Yahoo! finance API is intended for personal use only.**
|
||||
|
||||
---
|
||||
|
||||
## What's new in version 0.2
|
||||
## News [2023-01-27]
|
||||
Since December 2022 Yahoo has been encrypting the web data that `yfinance` scrapes for non-market data. Fortunately the decryption keys are available, although Yahoo moved/changed them several times hence `yfinance` breaking several times. `yfinance` is now better prepared for any future changes by Yahoo.
|
||||
|
||||
- Optimised web scraping
|
||||
- All 3 financials tables now match website so expect keys to change. If you really want old tables, use [`Ticker.get_[income_stmt|balance_sheet|cashflow](legacy=True, ...)`](https://github.com/ranaroussi/yfinance/blob/85783da515761a145411d742c2a8a3c1517264b0/yfinance/base.py#L968)
|
||||
- price data improvements: fix bug NaN rows with dividend; new repair feature for missing or 100x prices `download(repair=True)`; new attribute `Ticker.history_metadata`
|
||||
[See release notes for full list of changes](https://github.com/ranaroussi/yfinance/releases/tag/0.2.1)
|
||||
Why is Yahoo doing this? We don't know. Is it to stop scrapers? Maybe, so we've implemented changes to reduce load on Yahoo. In December we rolled out version 0.2 with optimised scraping. Then in 0.2.6 introduced `Ticker.fast_info`, providing much faster access to some `info` elements wherever possible e.g. price stats and forcing users to switch (sorry but we think necessary). `info` will continue to exist for as long as there are elements without a fast alternative.
|
||||
|
||||
## Quick Start
|
||||
|
||||
@@ -60,33 +58,28 @@ import yfinance as yf
|
||||
|
||||
msft = yf.Ticker("MSFT")
|
||||
|
||||
# fast access to subset of stock info
|
||||
msft.basic_info
|
||||
# slow access to all stock info
|
||||
# get all stock info (slow)
|
||||
msft.info
|
||||
# fast access to subset of stock info (opportunistic)
|
||||
msft.fast_info
|
||||
|
||||
# get historical market data
|
||||
hist = msft.history(period="max")
|
||||
hist = msft.history(period="1mo")
|
||||
|
||||
# show meta information about the history (requires history() to be called first)
|
||||
msft.history_metadata
|
||||
|
||||
# show actions (dividends, splits, capital gains)
|
||||
msft.actions
|
||||
|
||||
# show dividends
|
||||
msft.dividends
|
||||
|
||||
# show splits
|
||||
msft.splits
|
||||
|
||||
|
||||
# show capital gains (for mutual funds & etfs)
|
||||
msft.capital_gains
|
||||
msft.capital_gains # only for mutual funds & etfs
|
||||
|
||||
# show share count
|
||||
# - yearly summary:
|
||||
msft.shares
|
||||
msft.get_shares_full()
|
||||
# - accurate time-series count:
|
||||
msft.get_shares_full(start="2022-01-01", end=None)
|
||||
|
||||
# show financials:
|
||||
# - income statement
|
||||
@@ -100,13 +93,9 @@ msft.cashflow
|
||||
msft.quarterly_cashflow
|
||||
# see `Ticker.get_income_stmt()` for more options
|
||||
|
||||
# show major holders
|
||||
# show holders
|
||||
msft.major_holders
|
||||
|
||||
# show institutional holders
|
||||
msft.institutional_holders
|
||||
|
||||
# show mutualfund holders
|
||||
msft.mutualfund_holders
|
||||
|
||||
# show earnings
|
||||
@@ -165,19 +154,6 @@ msft.option_chain(..., proxy="PROXY_SERVER")
|
||||
...
|
||||
```
|
||||
|
||||
To use a custom `requests` session (for example to cache calls to the
|
||||
API or customize the `User-agent` header), pass a `session=` argument to
|
||||
the Ticker constructor.
|
||||
|
||||
```python
|
||||
import requests_cache
|
||||
session = requests_cache.CachedSession('yfinance.cache')
|
||||
session.headers['User-agent'] = 'my-program/1.0'
|
||||
ticker = yf.Ticker('msft', session=session)
|
||||
# The scraped response will be stored in the cache
|
||||
ticker.actions
|
||||
```
|
||||
|
||||
To initialize multiple `Ticker` objects, use
|
||||
|
||||
```python
|
||||
@@ -191,6 +167,29 @@ tickers.tickers['AAPL'].history(period="1mo")
|
||||
tickers.tickers['GOOG'].actions
|
||||
```
|
||||
|
||||
### Caching
|
||||
|
||||
Heavy users will quickly encounter Yahoo's rate limits on free use.
|
||||
A `requests` session can help by caching web requests.
|
||||
To use, pass a `session=` argument to the Ticker constructor:
|
||||
|
||||
```python
|
||||
import requests_cache
|
||||
session = requests_cache.CachedSession('yfinance.cache')
|
||||
# session.headers['User-agent'] = 'my-program/1.0' # <- Optional
|
||||
ticker = yf.Ticker('msft aapl goog', session=session)
|
||||
# The scraped response will be stored in the cache
|
||||
ticker.actions
|
||||
```
|
||||
To assist, `yfinance` removes requests from cache that failed to parse.
|
||||
To disable this feature call `yfinance.disable_prune_session_cache()`.
|
||||
|
||||
Add expiration to the session to prune old data:
|
||||
```python
|
||||
session = requests_cache.CachedSession('yfinance.cache', expire_after=datetime.timedelta(minutes=60))
|
||||
```
|
||||
More info here: https://requests-cache.readthedocs.io/en/stable/user_guide/expiration.html
|
||||
|
||||
### Fetching data for multiple tickers
|
||||
|
||||
```python
|
||||
@@ -227,7 +226,7 @@ data = yf.download( # or pdr.get_data_yahoo(...
|
||||
# (optional, default is False)
|
||||
auto_adjust = True,
|
||||
|
||||
# attempt repair of missing data or currency mixups e.g. $/cents
|
||||
# attempt repair of Yahoo data issues
|
||||
repair = False,
|
||||
|
||||
# download pre/post regular market hours data
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
{% set name = "yfinance" %}
|
||||
{% set version = "0.2.5" %}
|
||||
{% set version = "0.2.9" %}
|
||||
|
||||
package:
|
||||
name: "{{ name|lower }}"
|
||||
|
||||
293
tests/prices.py
293
tests/prices.py
@@ -24,14 +24,12 @@ class TestPriceHistory(unittest.TestCase):
|
||||
|
||||
def test_daily_index(self):
|
||||
tkrs = ["BHP.AX", "IMP.JO", "BP.L", "PNL.L", "INTC"]
|
||||
|
||||
intervals = ["1d", "1wk", "1mo"]
|
||||
|
||||
for tkr in tkrs:
|
||||
dat = yf.Ticker(tkr, session=self.session)
|
||||
ticker = yf.Ticker(tkr, session=self.session)
|
||||
|
||||
for interval in intervals:
|
||||
df = dat.history(period="5y", interval=interval)
|
||||
df = ticker.history(period="5y", interval=interval)
|
||||
|
||||
f = df.index.time == _dt.time(0)
|
||||
self.assertTrue(f.all())
|
||||
@@ -39,13 +37,14 @@ class TestPriceHistory(unittest.TestCase):
|
||||
def test_duplicatingHourly(self):
|
||||
tkrs = ["IMP.JO", "BHG.JO", "SSW.JO", "BP.L", "INTC"]
|
||||
for tkr in tkrs:
|
||||
dat = yf.Ticker(tkr, session=self.session)
|
||||
tz = dat._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
|
||||
ticker = yf.Ticker(tkr, session=self.session)
|
||||
tz = ticker._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
|
||||
|
||||
dt_utc = _tz.timezone("UTC").localize(_dt.datetime.utcnow())
|
||||
dt = dt_utc.astimezone(_tz.timezone(tz))
|
||||
|
||||
df = dat.history(start=dt.date() - _dt.timedelta(days=1), interval="1h")
|
||||
start_d = dt.date() - _dt.timedelta(days=7)
|
||||
df = ticker.history(start=start_d, interval="1h")
|
||||
|
||||
dt0 = df.index[-2]
|
||||
dt1 = df.index[-1]
|
||||
@@ -55,13 +54,12 @@ class TestPriceHistory(unittest.TestCase):
|
||||
print("Ticker = ", tkr)
|
||||
raise
|
||||
|
||||
|
||||
def test_duplicatingDaily(self):
|
||||
tkrs = ["IMP.JO", "BHG.JO", "SSW.JO", "BP.L", "INTC"]
|
||||
test_run = False
|
||||
for tkr in tkrs:
|
||||
dat = yf.Ticker(tkr, session=self.session)
|
||||
tz = dat._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
|
||||
ticker = yf.Ticker(tkr, session=self.session)
|
||||
tz = ticker._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
|
||||
|
||||
dt_utc = _tz.timezone("UTC").localize(_dt.datetime.utcnow())
|
||||
dt = dt_utc.astimezone(_tz.timezone(tz))
|
||||
@@ -69,7 +67,7 @@ class TestPriceHistory(unittest.TestCase):
|
||||
continue
|
||||
test_run = True
|
||||
|
||||
df = dat.history(start=dt.date() - _dt.timedelta(days=7), interval="1d")
|
||||
df = ticker.history(start=dt.date() - _dt.timedelta(days=7), interval="1d")
|
||||
|
||||
dt0 = df.index[-2]
|
||||
dt1 = df.index[-1]
|
||||
@@ -86,15 +84,15 @@ class TestPriceHistory(unittest.TestCase):
|
||||
tkrs = ['MSFT', 'IWO', 'VFINX', '^GSPC', 'BTC-USD']
|
||||
test_run = False
|
||||
for tkr in tkrs:
|
||||
dat = yf.Ticker(tkr, session=self.session)
|
||||
tz = dat._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
|
||||
ticker = yf.Ticker(tkr, session=self.session)
|
||||
tz = ticker._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
|
||||
|
||||
dt = _tz.timezone(tz).localize(_dt.datetime.now())
|
||||
if dt.date().weekday() not in [1, 2, 3, 4]:
|
||||
continue
|
||||
test_run = True
|
||||
|
||||
df = dat.history(start=dt.date() - _dt.timedelta(days=7), interval="1wk")
|
||||
df = ticker.history(start=dt.date() - _dt.timedelta(days=7), interval="1wk")
|
||||
dt0 = df.index[-2]
|
||||
dt1 = df.index[-1]
|
||||
try:
|
||||
@@ -110,22 +108,27 @@ class TestPriceHistory(unittest.TestCase):
|
||||
def test_intraDayWithEvents(self):
|
||||
# TASE dividend release pre-market, doesn't merge nicely with intra-day data so check still present
|
||||
|
||||
tkr = "ICL.TA"
|
||||
# tkr = "ESLT.TA"
|
||||
# tkr = "ONE.TA"
|
||||
# tkr = "MGDL.TA"
|
||||
start_d = _dt.date.today() - _dt.timedelta(days=60)
|
||||
end_d = None
|
||||
df_daily = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=True)
|
||||
df_daily_divs = df_daily["Dividends"][df_daily["Dividends"] != 0]
|
||||
if df_daily_divs.shape[0] == 0:
|
||||
self.skipTest("Skipping test_intraDayWithEvents() because 'ICL.TA' has no dividend in last 60 days")
|
||||
tase_tkrs = ["ICL.TA", "ESLT.TA", "ONE.TA", "MGDL.TA"]
|
||||
test_run = False
|
||||
for tkr in tase_tkrs:
|
||||
start_d = _dt.date.today() - _dt.timedelta(days=59)
|
||||
end_d = None
|
||||
df_daily = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=True)
|
||||
df_daily_divs = df_daily["Dividends"][df_daily["Dividends"] != 0]
|
||||
if df_daily_divs.shape[0] == 0:
|
||||
# self.skipTest("Skipping test_intraDayWithEvents() because 'ICL.TA' has no dividend in last 60 days")
|
||||
continue
|
||||
|
||||
last_div_date = df_daily_divs.index[-1]
|
||||
start_d = last_div_date.date()
|
||||
end_d = last_div_date.date() + _dt.timedelta(days=1)
|
||||
df = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="15m", actions=True)
|
||||
self.assertTrue((df["Dividends"] != 0.0).any())
|
||||
last_div_date = df_daily_divs.index[-1]
|
||||
start_d = last_div_date.date()
|
||||
end_d = last_div_date.date() + _dt.timedelta(days=1)
|
||||
df = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="15m", actions=True)
|
||||
self.assertTrue((df["Dividends"] != 0.0).any())
|
||||
test_run = True
|
||||
break
|
||||
|
||||
if not test_run:
|
||||
self.skipTest("Skipping test_intraDayWithEvents() because no tickers had a dividend in last 60 days")
|
||||
|
||||
def test_dailyWithEvents(self):
|
||||
# Reproduce issue #521
|
||||
@@ -230,7 +233,6 @@ class TestPriceHistory(unittest.TestCase):
|
||||
|
||||
def test_tz_dst_ambiguous(self):
|
||||
# Reproduce issue #1100
|
||||
|
||||
try:
|
||||
yf.Ticker("ESLT.TA", session=self.session).history(start="2002-10-06", end="2002-10-09", interval="1d")
|
||||
except _tz.exceptions.AmbiguousTimeError:
|
||||
@@ -245,36 +247,178 @@ class TestPriceHistory(unittest.TestCase):
|
||||
# The correction is successful if no days are weekend, and weekly data begins Monday
|
||||
|
||||
tkr = "AGRO3.SA"
|
||||
dat = yf.Ticker(tkr, session=self.session)
|
||||
ticker = yf.Ticker(tkr, session=self.session)
|
||||
start = "2021-01-11"
|
||||
end = "2022-11-05"
|
||||
|
||||
interval = "1d"
|
||||
df = dat.history(start=start, end=end, interval=interval)
|
||||
df = ticker.history(start=start, end=end, interval=interval)
|
||||
self.assertTrue(((df.index.weekday >= 0) & (df.index.weekday <= 4)).all())
|
||||
|
||||
interval = "1wk"
|
||||
df = dat.history(start=start, end=end, interval=interval)
|
||||
df = ticker.history(start=start, end=end, interval=interval)
|
||||
try:
|
||||
self.assertTrue((df.index.weekday == 0).all())
|
||||
except:
|
||||
print("Weekly data not aligned to Monday")
|
||||
raise
|
||||
|
||||
def test_prune_post_intraday_us(self):
|
||||
# Half-day before USA Thanksgiving. Yahoo normally
|
||||
# returns an interval starting when regular trading closes,
|
||||
# even if prepost=False.
|
||||
|
||||
# Setup
|
||||
tkr = "AMZN"
|
||||
interval = "1h"
|
||||
interval_td = _dt.timedelta(hours=1)
|
||||
time_open = _dt.time(9, 30)
|
||||
time_close = _dt.time(16)
|
||||
special_day = _dt.date(2022, 11, 25)
|
||||
time_early_close = _dt.time(13)
|
||||
dat = yf.Ticker(tkr, session=self.session)
|
||||
|
||||
# Run
|
||||
start_d = special_day - _dt.timedelta(days=7)
|
||||
end_d = special_day + _dt.timedelta(days=7)
|
||||
df = dat.history(start=start_d, end=end_d, interval=interval, prepost=False, keepna=True)
|
||||
tg_last_dt = df.loc[str(special_day)].index[-1]
|
||||
self.assertTrue(tg_last_dt.time() < time_early_close)
|
||||
|
||||
# Test no other afternoons (or mornings) were pruned
|
||||
start_d = _dt.date(special_day.year, 1, 1)
|
||||
end_d = _dt.date(special_day.year+1, 1, 1)
|
||||
df = dat.history(start=start_d, end=end_d, interval="1h", prepost=False, keepna=True)
|
||||
last_dts = _pd.Series(df.index).groupby(df.index.date).last()
|
||||
f_early_close = (last_dts+interval_td).dt.time < time_close
|
||||
early_close_dates = last_dts.index[f_early_close].values
|
||||
self.assertEqual(len(early_close_dates), 1)
|
||||
self.assertEqual(early_close_dates[0], special_day)
|
||||
|
||||
first_dts = _pd.Series(df.index).groupby(df.index.date).first()
|
||||
f_late_open = first_dts.dt.time > time_open
|
||||
late_open_dates = first_dts.index[f_late_open]
|
||||
self.assertEqual(len(late_open_dates), 0)
|
||||
|
||||
def test_prune_post_intraday_omx(self):
|
||||
# Half-day before Sweden Christmas. Yahoo normally
|
||||
# returns an interval starting when regular trading closes,
|
||||
# even if prepost=False.
|
||||
# If prepost=False, test that yfinance is removing prepost intervals.
|
||||
|
||||
# Setup
|
||||
tkr = "AEC.ST"
|
||||
interval = "1h"
|
||||
interval_td = _dt.timedelta(hours=1)
|
||||
time_open = _dt.time(9)
|
||||
time_close = _dt.time(17,30)
|
||||
special_day = _dt.date(2022, 12, 23)
|
||||
time_early_close = _dt.time(13, 2)
|
||||
dat = yf.Ticker(tkr, session=self.session)
|
||||
|
||||
# Half trading day Jan 5, Apr 14, May 25, Jun 23, Nov 4, Dec 23, Dec 30
|
||||
half_days = [_dt.date(special_day.year, x[0], x[1]) for x in [(1,5), (4,14), (5,25), (6,23), (11,4), (12,23), (12,30)]]
|
||||
|
||||
# Yahoo has incorrectly classified afternoon of 2022-04-13 as post-market.
|
||||
# Nothing yfinance can do because Yahoo doesn't return data with prepost=False.
|
||||
# But need to handle in this test.
|
||||
expected_incorrect_half_days = [_dt.date(2022,4,13)]
|
||||
half_days = sorted(half_days+expected_incorrect_half_days)
|
||||
|
||||
# Run
|
||||
start_d = special_day - _dt.timedelta(days=7)
|
||||
end_d = special_day + _dt.timedelta(days=7)
|
||||
df = dat.history(start=start_d, end=end_d, interval=interval, prepost=False, keepna=True)
|
||||
tg_last_dt = df.loc[str(special_day)].index[-1]
|
||||
self.assertTrue(tg_last_dt.time() < time_early_close)
|
||||
|
||||
# Test no other afternoons (or mornings) were pruned
|
||||
start_d = _dt.date(special_day.year, 1, 1)
|
||||
end_d = _dt.date(special_day.year+1, 1, 1)
|
||||
df = dat.history(start=start_d, end=end_d, interval="1h", prepost=False, keepna=True)
|
||||
last_dts = _pd.Series(df.index).groupby(df.index.date).last()
|
||||
f_early_close = (last_dts+interval_td).dt.time < time_close
|
||||
early_close_dates = last_dts.index[f_early_close].values
|
||||
unexpected_early_close_dates = [d for d in early_close_dates if not d in half_days]
|
||||
self.assertEqual(len(unexpected_early_close_dates), 0)
|
||||
self.assertEqual(len(early_close_dates), len(half_days))
|
||||
self.assertTrue(_np.equal(early_close_dates, half_days).all())
|
||||
|
||||
first_dts = _pd.Series(df.index).groupby(df.index.date).first()
|
||||
f_late_open = first_dts.dt.time > time_open
|
||||
late_open_dates = first_dts.index[f_late_open]
|
||||
self.assertEqual(len(late_open_dates), 0)
|
||||
|
||||
def test_prune_post_intraday_asx(self):
|
||||
# Setup
|
||||
tkr = "BHP.AX"
|
||||
interval = "1h"
|
||||
interval_td = _dt.timedelta(hours=1)
|
||||
time_open = _dt.time(10)
|
||||
time_close = _dt.time(16,12)
|
||||
# No early closes in 2022
|
||||
dat = yf.Ticker(tkr, session=self.session)
|
||||
|
||||
# Test no afternoons (or mornings) were pruned
|
||||
start_d = _dt.date(2022, 1, 1)
|
||||
end_d = _dt.date(2022+1, 1, 1)
|
||||
df = dat.history(start=start_d, end=end_d, interval="1h", prepost=False, keepna=True)
|
||||
last_dts = _pd.Series(df.index).groupby(df.index.date).last()
|
||||
f_early_close = (last_dts+interval_td).dt.time < time_close
|
||||
early_close_dates = last_dts.index[f_early_close].values
|
||||
self.assertEqual(len(early_close_dates), 0)
|
||||
|
||||
first_dts = _pd.Series(df.index).groupby(df.index.date).first()
|
||||
f_late_open = first_dts.dt.time > time_open
|
||||
late_open_dates = first_dts.index[f_late_open]
|
||||
self.assertEqual(len(late_open_dates), 0)
|
||||
|
||||
def test_weekly_2rows_fix(self):
|
||||
tkr = "AMZN"
|
||||
start = _dt.date.today() - _dt.timedelta(days=14)
|
||||
start -= _dt.timedelta(days=start.weekday())
|
||||
|
||||
dat = yf.Ticker(tkr)
|
||||
df = dat.history(start=start, interval="1wk")
|
||||
ticker = yf.Ticker(tkr)
|
||||
df = ticker.history(start=start, interval="1wk")
|
||||
self.assertTrue((df.index.weekday == 0).all())
|
||||
|
||||
class TestPriceRepair(unittest.TestCase):
|
||||
session = None
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.session = requests_cache.CachedSession(backend='memory')
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls):
|
||||
if cls.session is not None:
|
||||
cls.session.close()
|
||||
|
||||
def test_reconstruct_2m(self):
|
||||
# 2m repair requires 1m data.
|
||||
# Yahoo restricts 1m fetches to 7 days max within last 30 days.
|
||||
# Need to test that '_reconstruct_intervals_batch()' can handle this.
|
||||
|
||||
tkrs = ["BHP.AX", "IMP.JO", "BP.L", "PNL.L", "INTC"]
|
||||
|
||||
dt_now = _pd.Timestamp.utcnow()
|
||||
td_7d = _dt.timedelta(days=7)
|
||||
td_60d = _dt.timedelta(days=60)
|
||||
|
||||
# Round time for 'requests_cache' reuse
|
||||
dt_now = dt_now.ceil("1h")
|
||||
|
||||
for tkr in tkrs:
|
||||
dat = yf.Ticker(tkr, session=self.session)
|
||||
end_dt = dt_now
|
||||
start_dt = end_dt - td_60d
|
||||
df = dat.history(start=start_dt, end=end_dt, interval="2m", repair=True)
|
||||
|
||||
def test_repair_100x_weekly(self):
|
||||
# Setup:
|
||||
tkr = "PNL.L"
|
||||
dat = yf.Ticker(tkr, session=self.session)
|
||||
tz_exchange = dat.info["exchangeTimezoneName"]
|
||||
tz_exchange = ticker.fast_info["timezone"]
|
||||
|
||||
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
|
||||
df = _pd.DataFrame(data={"Open": [470.5, 473.5, 474.5, 470],
|
||||
@@ -283,22 +427,22 @@ class TestPriceHistory(unittest.TestCase):
|
||||
"Close": [475, 473.5, 472, 473.5],
|
||||
"Adj Close": [475, 473.5, 472, 473.5],
|
||||
"Volume": [2295613, 2245604, 3000287, 2635611]},
|
||||
index=_pd.to_datetime([_dt.date(2022, 10, 23),
|
||||
_dt.date(2022, 10, 16),
|
||||
_dt.date(2022, 10, 9),
|
||||
_dt.date(2022, 10, 2)]))
|
||||
index=_pd.to_datetime([_dt.date(2022, 10, 24),
|
||||
_dt.date(2022, 10, 17),
|
||||
_dt.date(2022, 10, 10),
|
||||
_dt.date(2022, 10, 3)]))
|
||||
df = df.sort_index()
|
||||
df.index.name = "Date"
|
||||
df_bad = df.copy()
|
||||
df_bad.loc["2022-10-23", "Close"] *= 100
|
||||
df_bad.loc["2022-10-16", "Low"] *= 100
|
||||
df_bad.loc["2022-10-2", "Open"] *= 100
|
||||
df_bad.loc["2022-10-24", "Close"] *= 100
|
||||
df_bad.loc["2022-10-17", "Low"] *= 100
|
||||
df_bad.loc["2022-10-03", "Open"] *= 100
|
||||
df.index = df.index.tz_localize(tz_exchange)
|
||||
df_bad.index = df_bad.index.tz_localize(tz_exchange)
|
||||
|
||||
# Run test
|
||||
|
||||
df_repaired = dat._fix_unit_mixups(df_bad, "1wk", tz_exchange)
|
||||
df_repaired = ticker._fix_unit_mixups(df_bad, "1wk", tz_exchange, prepost=False)
|
||||
|
||||
# First test - no errors left
|
||||
for c in data_cols:
|
||||
@@ -325,8 +469,9 @@ class TestPriceHistory(unittest.TestCase):
|
||||
# PNL.L has a stock-split in 2022. Sometimes requesting data before 2022 is not split-adjusted.
|
||||
|
||||
tkr = "PNL.L"
|
||||
dat = yf.Ticker(tkr, session=self.session)
|
||||
tz_exchange = dat.info["exchangeTimezoneName"]
|
||||
|
||||
ticker = yf.Ticker(tkr, session=self.session)
|
||||
tz_exchange = ticker.fast_info["timezone"]
|
||||
|
||||
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
|
||||
df = _pd.DataFrame(data={"Open": [400, 398, 392.5, 417],
|
||||
@@ -353,7 +498,7 @@ class TestPriceHistory(unittest.TestCase):
|
||||
df.index = df.index.tz_localize(tz_exchange)
|
||||
df_bad.index = df_bad.index.tz_localize(tz_exchange)
|
||||
|
||||
df_repaired = dat._fix_unit_mixups(df_bad, "1wk", tz_exchange)
|
||||
df_repaired = ticker._fix_unit_mixups(df_bad, "1wk", tz_exchange, prepost=False)
|
||||
|
||||
# First test - no errors left
|
||||
for c in data_cols:
|
||||
@@ -380,8 +525,8 @@ class TestPriceHistory(unittest.TestCase):
|
||||
|
||||
def test_repair_100x_daily(self):
|
||||
tkr = "PNL.L"
|
||||
dat = yf.Ticker(tkr, session=self.session)
|
||||
tz_exchange = dat.info["exchangeTimezoneName"]
|
||||
ticker = yf.Ticker(tkr, session=self.session)
|
||||
tz_exchange = ticker.fast_info["timezone"]
|
||||
|
||||
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
|
||||
df = _pd.DataFrame(data={"Open": [478, 476, 476, 472],
|
||||
@@ -403,7 +548,7 @@ class TestPriceHistory(unittest.TestCase):
|
||||
df.index = df.index.tz_localize(tz_exchange)
|
||||
df_bad.index = df_bad.index.tz_localize(tz_exchange)
|
||||
|
||||
df_repaired = dat._fix_unit_mixups(df_bad, "1d", tz_exchange)
|
||||
df_repaired = ticker._fix_unit_mixups(df_bad, "1d", tz_exchange, prepost=False)
|
||||
|
||||
# First test - no errors left
|
||||
for c in data_cols:
|
||||
@@ -422,8 +567,9 @@ class TestPriceHistory(unittest.TestCase):
|
||||
|
||||
def test_repair_zeroes_daily(self):
|
||||
tkr = "BBIL.L"
|
||||
dat = yf.Ticker(tkr, session=self.session)
|
||||
tz_exchange = dat.info["exchangeTimezoneName"]
|
||||
|
||||
ticker = yf.Ticker(tkr, session=self.session)
|
||||
tz_exchange = ticker.fast_info["timezone"]
|
||||
|
||||
df_bad = _pd.DataFrame(data={"Open": [0, 102.04, 102.04],
|
||||
"High": [0, 102.1, 102.11],
|
||||
@@ -438,7 +584,7 @@ class TestPriceHistory(unittest.TestCase):
|
||||
df_bad.index.name = "Date"
|
||||
df_bad.index = df_bad.index.tz_localize(tz_exchange)
|
||||
|
||||
repaired_df = dat._fix_zeroes(df_bad, "1d", tz_exchange)
|
||||
repaired_df = ticker._fix_zeroes(df_bad, "1d", tz_exchange, prepost=False)
|
||||
|
||||
correct_df = df_bad.copy()
|
||||
correct_df.loc["2022-11-01", "Open"] = 102.080002
|
||||
@@ -449,41 +595,32 @@ class TestPriceHistory(unittest.TestCase):
|
||||
|
||||
def test_repair_zeroes_hourly(self):
|
||||
tkr = "INTC"
|
||||
dat = yf.Ticker(tkr, session=self.session)
|
||||
tz_exchange = dat.info["exchangeTimezoneName"]
|
||||
ticker = yf.Ticker(tkr, session=self.session)
|
||||
tz_exchange = ticker.fast_info["timezone"]
|
||||
|
||||
df_bad = _pd.DataFrame(data={"Open": [29.68, 29.49, 29.545, _np.nan, 29.485],
|
||||
"High": [29.68, 29.625, 29.58, _np.nan, 29.49],
|
||||
"Low": [29.46, 29.4, 29.45, _np.nan, 29.31],
|
||||
"Close": [29.485, 29.545, 29.485, _np.nan, 29.325],
|
||||
"Adj Close": [29.485, 29.545, 29.485, _np.nan, 29.325],
|
||||
"Volume": [3258528, 2140195, 1621010, 0, 0]},
|
||||
index=_pd.to_datetime([_dt.datetime(2022,11,25, 9,30),
|
||||
_dt.datetime(2022,11,25, 10,30),
|
||||
_dt.datetime(2022,11,25, 11,30),
|
||||
_dt.datetime(2022,11,25, 12,30),
|
||||
_dt.datetime(2022,11,25, 13,00)]))
|
||||
df_bad = df_bad.sort_index()
|
||||
df_bad.index.name = "Date"
|
||||
df_bad.index = df_bad.index.tz_localize(tz_exchange)
|
||||
correct_df = ticker.history(period="1wk", interval="1h", auto_adjust=False, repair=True)
|
||||
|
||||
repaired_df = dat._fix_zeroes(df_bad, "1h", tz_exchange)
|
||||
df_bad = correct_df.copy()
|
||||
bad_idx = correct_df.index[10]
|
||||
df_bad.loc[bad_idx, "Open"] = _np.nan
|
||||
df_bad.loc[bad_idx, "High"] = _np.nan
|
||||
df_bad.loc[bad_idx, "Low"] = _np.nan
|
||||
df_bad.loc[bad_idx, "Close"] = _np.nan
|
||||
df_bad.loc[bad_idx, "Adj Close"] = _np.nan
|
||||
df_bad.loc[bad_idx, "Volume"] = 0
|
||||
|
||||
repaired_df = ticker._fix_zeroes(df_bad, "1h", tz_exchange, prepost=False)
|
||||
|
||||
correct_df = df_bad.copy()
|
||||
idx = _pd.Timestamp(2022,11,25, 12,30).tz_localize(tz_exchange)
|
||||
correct_df.loc[idx, "Open"] = 29.485001
|
||||
correct_df.loc[idx, "High"] = 29.49
|
||||
correct_df.loc[idx, "Low"] = 29.43
|
||||
correct_df.loc[idx, "Close"] = 29.455
|
||||
correct_df.loc[idx, "Adj Close"] = 29.455
|
||||
correct_df.loc[idx, "Volume"] = 609164
|
||||
for c in ["Open", "Low", "High", "Close"]:
|
||||
try:
|
||||
self.assertTrue(_np.isclose(repaired_df[c], correct_df[c], rtol=1e-7).all())
|
||||
except:
|
||||
print("COLUMN", c)
|
||||
print("- repaired_df")
|
||||
print(repaired_df)
|
||||
print("- correct_df[c]:")
|
||||
print(correct_df[c])
|
||||
print("- diff:")
|
||||
print(repaired_df[c] - correct_df[c])
|
||||
raise
|
||||
|
||||
|
||||
382
tests/ticker.py
382
tests/ticker.py
@@ -44,8 +44,8 @@ class TestTicker(unittest.TestCase):
|
||||
yf.utils.get_tz_cache().store(tkr, None)
|
||||
|
||||
# Test:
|
||||
dat = yf.Ticker(tkr, session=self.session)
|
||||
tz = dat._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
|
||||
ticker = yf.Ticker(tkr, session=self.session)
|
||||
tz = ticker._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
|
||||
|
||||
self.assertIsNotNone(tz)
|
||||
|
||||
@@ -53,82 +53,245 @@ class TestTicker(unittest.TestCase):
|
||||
# Check yfinance doesn't die when ticker delisted
|
||||
|
||||
tkr = "AM2Z.TA"
|
||||
dat = yf.Ticker(tkr, session=self.session)
|
||||
dat.history(period="1wk")
|
||||
dat.history(start="2022-01-01")
|
||||
dat.history(start="2022-01-01", end="2022-03-01")
|
||||
ticker = yf.Ticker(tkr, session=self.session)
|
||||
ticker.history(period="1wk")
|
||||
ticker.history(start="2022-01-01")
|
||||
ticker.history(start="2022-01-01", end="2022-03-01")
|
||||
yf.download([tkr], period="1wk")
|
||||
dat.isin
|
||||
dat.major_holders
|
||||
dat.institutional_holders
|
||||
dat.mutualfund_holders
|
||||
dat.dividends
|
||||
dat.splits
|
||||
dat.actions
|
||||
dat.shares
|
||||
dat.get_shares_full()
|
||||
dat.info
|
||||
dat.calendar
|
||||
dat.recommendations
|
||||
dat.earnings
|
||||
dat.quarterly_earnings
|
||||
dat.income_stmt
|
||||
dat.quarterly_income_stmt
|
||||
dat.balance_sheet
|
||||
dat.quarterly_balance_sheet
|
||||
dat.cashflow
|
||||
dat.quarterly_cashflow
|
||||
dat.recommendations_summary
|
||||
dat.analyst_price_target
|
||||
dat.revenue_forecasts
|
||||
dat.sustainability
|
||||
dat.options
|
||||
dat.news
|
||||
dat.earnings_trend
|
||||
dat.earnings_dates
|
||||
dat.earnings_forecasts
|
||||
ticker.isin
|
||||
ticker.major_holders
|
||||
ticker.institutional_holders
|
||||
ticker.mutualfund_holders
|
||||
ticker.dividends
|
||||
ticker.splits
|
||||
ticker.actions
|
||||
ticker.shares
|
||||
ticker.get_shares_full()
|
||||
ticker.info
|
||||
ticker.calendar
|
||||
ticker.recommendations
|
||||
ticker.earnings
|
||||
ticker.quarterly_earnings
|
||||
ticker.income_stmt
|
||||
ticker.quarterly_income_stmt
|
||||
ticker.balance_sheet
|
||||
ticker.quarterly_balance_sheet
|
||||
ticker.cashflow
|
||||
ticker.quarterly_cashflow
|
||||
ticker.recommendations_summary
|
||||
ticker.analyst_price_target
|
||||
ticker.revenue_forecasts
|
||||
ticker.sustainability
|
||||
ticker.options
|
||||
ticker.news
|
||||
ticker.earnings_trend
|
||||
ticker.earnings_dates
|
||||
ticker.earnings_forecasts
|
||||
|
||||
def test_goodTicker(self):
|
||||
# that yfinance works when full api is called on same instance of ticker
|
||||
|
||||
tkr = "IBM"
|
||||
dat = yf.Ticker(tkr, session=self.session)
|
||||
ticker = yf.Ticker(tkr, session=self.session)
|
||||
|
||||
dat.isin
|
||||
dat.major_holders
|
||||
dat.institutional_holders
|
||||
dat.mutualfund_holders
|
||||
dat.dividends
|
||||
dat.splits
|
||||
dat.actions
|
||||
dat.shares
|
||||
dat.get_shares_full()
|
||||
dat.info
|
||||
dat.calendar
|
||||
dat.recommendations
|
||||
dat.earnings
|
||||
dat.quarterly_earnings
|
||||
dat.income_stmt
|
||||
dat.quarterly_income_stmt
|
||||
dat.balance_sheet
|
||||
dat.quarterly_balance_sheet
|
||||
dat.cashflow
|
||||
dat.quarterly_cashflow
|
||||
dat.recommendations_summary
|
||||
dat.analyst_price_target
|
||||
dat.revenue_forecasts
|
||||
dat.sustainability
|
||||
dat.options
|
||||
dat.news
|
||||
dat.earnings_trend
|
||||
dat.earnings_dates
|
||||
dat.earnings_forecasts
|
||||
ticker.isin
|
||||
ticker.major_holders
|
||||
ticker.institutional_holders
|
||||
ticker.mutualfund_holders
|
||||
ticker.dividends
|
||||
ticker.splits
|
||||
ticker.actions
|
||||
ticker.shares
|
||||
ticker.get_shares_full()
|
||||
ticker.info
|
||||
ticker.calendar
|
||||
ticker.recommendations
|
||||
ticker.earnings
|
||||
ticker.quarterly_earnings
|
||||
ticker.income_stmt
|
||||
ticker.quarterly_income_stmt
|
||||
ticker.balance_sheet
|
||||
ticker.quarterly_balance_sheet
|
||||
ticker.cashflow
|
||||
ticker.quarterly_cashflow
|
||||
ticker.recommendations_summary
|
||||
ticker.analyst_price_target
|
||||
ticker.revenue_forecasts
|
||||
ticker.sustainability
|
||||
ticker.options
|
||||
ticker.news
|
||||
ticker.earnings_trend
|
||||
ticker.earnings_dates
|
||||
ticker.earnings_forecasts
|
||||
|
||||
dat.history(period="1wk")
|
||||
dat.history(start="2022-01-01")
|
||||
dat.history(start="2022-01-01", end="2022-03-01")
|
||||
ticker.history(period="1wk")
|
||||
ticker.history(start="2022-01-01")
|
||||
ticker.history(start="2022-01-01", end="2022-03-01")
|
||||
yf.download([tkr], period="1wk")
|
||||
|
||||
def test_session_pruning_goodTkr(self):
|
||||
tkr = "IBM"
|
||||
url = "https://finance.yahoo.com/quote/"+tkr
|
||||
ticker = yf.Ticker(tkr, session=self.session)
|
||||
|
||||
# All requests should succeed, so all urls should be in cache
|
||||
|
||||
yf.enable_prune_session_cache()
|
||||
|
||||
expected_urls = []
|
||||
|
||||
ticker.history(period="1wk")
|
||||
ticker.dividends
|
||||
ticker.splits
|
||||
ticker.actions
|
||||
expected_urls.append(f"https://query2.finance.yahoo.com/v8/finance/chart/{tkr}?range=1wk&interval=1d&includePrePost=False&events=div%2Csplits%2CcapitalGains")
|
||||
|
||||
ticker.info
|
||||
ticker.isin
|
||||
ticker.calendar
|
||||
ticker.recommendations
|
||||
ticker.recommendations_summary
|
||||
ticker.sustainability
|
||||
expected_urls.append(f"https://finance.yahoo.com/quote/{tkr}")
|
||||
|
||||
ticker.analyst_price_target
|
||||
ticker.revenue_forecasts
|
||||
ticker.earnings_trend
|
||||
ticker.earnings_forecasts
|
||||
expected_urls.append(f"https://finance.yahoo.com/quote/{tkr}/analysis")
|
||||
|
||||
ticker.major_holders
|
||||
ticker.institutional_holders
|
||||
ticker.mutualfund_holders
|
||||
expected_urls.append(f"https://finance.yahoo.com/quote/{tkr}/holders")
|
||||
|
||||
ticker.shares
|
||||
ticker.earnings
|
||||
ticker.quarterly_earnings
|
||||
expected_urls.append(f"https://finance.yahoo.com/quote/{tkr}/financials")
|
||||
|
||||
ticker.income_stmt
|
||||
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=annualTotalRevenue...")
|
||||
ticker.quarterly_income_stmt
|
||||
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=quarterlyTotalRevenue...")
|
||||
|
||||
ticker.balance_sheet
|
||||
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=annualTotalAssets...")
|
||||
ticker.quarterly_balance_sheet
|
||||
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=quarterlyTotalAssets...")
|
||||
|
||||
ticker.cashflow
|
||||
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=annualCashFlowsfromusedinOperatingActivitiesDirect...")
|
||||
ticker.quarterly_cashflow
|
||||
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=quarterlyCashFlowsfromusedinOperatingActivitiesDirect...")
|
||||
|
||||
ticker.options
|
||||
expected_urls.append(f"https://query2.finance.yahoo.com/v7/finance/options/{tkr}")
|
||||
|
||||
ticker.news
|
||||
expected_urls.append(f"https://query2.finance.yahoo.com/v1/finance/search?q={tkr}")
|
||||
|
||||
ticker.earnings_dates
|
||||
expected_urls.append(f"https://finance.yahoo.com/calendar/earnings?symbol={tkr}&offset=0&size=12")
|
||||
|
||||
for url in expected_urls:
|
||||
if url.endswith("..."):
|
||||
# This url ridiculously long so just search for a partial match
|
||||
url2 = url.replace("...", "")
|
||||
in_cache = False
|
||||
# for surl in self.session.cache.urls:
|
||||
for response in self.session.cache.filter():
|
||||
surl = response.url
|
||||
if surl.startswith(url2):
|
||||
in_cache = True
|
||||
break
|
||||
self.assertTrue(in_cache, "This url missing from requests_cache: "+url)
|
||||
else:
|
||||
self.assertTrue(self.session.cache.contains(url=url), "This url missing from requests_cache: "+url)
|
||||
|
||||
def test_session_pruning_badTkr(self):
|
||||
# Ideally would test a valid ticker after triggering Yahoo block, but
|
||||
# that's not god for me. As a proxy, use invalid ticker
|
||||
tkr = "XYZ-X"
|
||||
url = "https://finance.yahoo.com/quote/"+tkr
|
||||
ticker = yf.Ticker(tkr, session=self.session)
|
||||
|
||||
# All requests should fail, so none of these urls should be in cache
|
||||
|
||||
yf.enable_prune_session_cache()
|
||||
|
||||
expected_urls = []
|
||||
|
||||
ticker.history(period="1wk")
|
||||
ticker.dividends
|
||||
ticker.splits
|
||||
ticker.actions
|
||||
expected_urls.append(f"https://query2.finance.yahoo.com/v8/finance/chart/{tkr}?range=1wk&interval=1d&includePrePost=False&events=div%2Csplits%2CcapitalGains")
|
||||
|
||||
ticker.info
|
||||
ticker.isin
|
||||
ticker.calendar
|
||||
ticker.recommendations
|
||||
ticker.recommendations_summary
|
||||
ticker.sustainability
|
||||
expected_urls.append(f"https://finance.yahoo.com/quote/{tkr}")
|
||||
|
||||
ticker.analyst_price_target
|
||||
ticker.revenue_forecasts
|
||||
ticker.earnings_trend
|
||||
ticker.earnings_forecasts
|
||||
expected_urls.append(f"https://finance.yahoo.com/quote/{tkr}/analysis")
|
||||
|
||||
ticker.major_holders
|
||||
ticker.institutional_holders
|
||||
ticker.mutualfund_holders
|
||||
expected_urls.append(f"https://finance.yahoo.com/quote/{tkr}/holders")
|
||||
|
||||
ticker.shares
|
||||
ticker.earnings
|
||||
ticker.quarterly_earnings
|
||||
expected_urls.append(f"https://finance.yahoo.com/quote/{tkr}/financials")
|
||||
|
||||
ticker.income_stmt
|
||||
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=annualTotalRevenue...")
|
||||
ticker.quarterly_income_stmt
|
||||
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=quarterlyTotalRevenue...")
|
||||
|
||||
ticker.balance_sheet
|
||||
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=annualTotalAssets...")
|
||||
ticker.quarterly_balance_sheet
|
||||
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=quarterlyTotalAssets...")
|
||||
|
||||
ticker.cashflow
|
||||
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=annualCashFlowsfromusedinOperatingActivitiesDirect...")
|
||||
ticker.quarterly_cashflow
|
||||
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=quarterlyCashFlowsfromusedinOperatingActivitiesDirect...")
|
||||
|
||||
ticker.options
|
||||
expected_urls.append(f"https://query2.finance.yahoo.com/v7/finance/options/{tkr}")
|
||||
|
||||
# Skip news, don't care if in cache
|
||||
# ticker.news
|
||||
# expected_urls.append(f"https://query2.finance.yahoo.com/v1/finance/search?q={tkr}")
|
||||
|
||||
df = ticker.earnings_dates
|
||||
expected_urls.append(f"https://finance.yahoo.com/calendar/earnings?symbol={tkr}&offset=0&size=12")
|
||||
|
||||
for url in expected_urls:
|
||||
if url.endswith("..."):
|
||||
# This url ridiculously long so just search for a partial match
|
||||
url2 = url.replace("...", "")
|
||||
in_cache = False
|
||||
# for surl in self.session.cache.urls:
|
||||
for response in self.session.cache.filter():
|
||||
surl = response.url
|
||||
if surl.startswith(url2):
|
||||
in_cache = True
|
||||
break
|
||||
self.assertFalse(in_cache, "This url wrongly in requests_cache: "+url)
|
||||
else:
|
||||
self.assertFalse(self.session.cache.contains(url=url), "This url wrongly in requests_cache: "+url)
|
||||
|
||||
|
||||
class TestTickerHistory(unittest.TestCase):
|
||||
session = None
|
||||
@@ -678,39 +841,45 @@ class TestTickerInfo(unittest.TestCase):
|
||||
cls.session.close()
|
||||
|
||||
def setUp(self):
|
||||
tkrs = ["ESLT.TA", "BP.L", "GOOGL"]
|
||||
self.tickers = [yf.Ticker(tkr, session=self.session) for tkr in tkrs]
|
||||
self.symbols = []
|
||||
self.symbols += ["ESLT.TA", "BP.L", "GOOGL"]
|
||||
self.symbols.append("QCSTIX") # good for testing, doesn't trade
|
||||
self.symbols += ["BTC-USD", "IWO", "VFINX", "^GSPC"]
|
||||
self.symbols += ["SOKE.IS", "ADS.DE"] # detected bugs
|
||||
self.tickers = [yf.Ticker(s, session=self.session) for s in self.symbols]
|
||||
|
||||
def tearDown(self):
|
||||
self.ticker = None
|
||||
|
||||
def test_info(self):
|
||||
data = self.ticker.info
|
||||
data = self.tickers[0].info
|
||||
self.assertIsInstance(data, dict, "data has wrong type")
|
||||
self.assertIn("symbol", data.keys(), "Did not find expected key in info dict")
|
||||
self.assertEqual("GOOGL", data["symbol"], "Wrong symbol value in info dict")
|
||||
self.assertEqual(self.symbols[0], data["symbol"], "Wrong symbol value in info dict")
|
||||
|
||||
def test_basic_info(self):
|
||||
def test_fast_info(self):
|
||||
yf.scrapers.quote.PRUNE_INFO = False
|
||||
|
||||
# basic_info_keys = self.ticker.basic_info.keys()
|
||||
basic_info_keys = set()
|
||||
fast_info_keys = set()
|
||||
for ticker in self.tickers:
|
||||
basic_info_keys.update(set(ticker.basic_info.keys()))
|
||||
basic_info_keys = sorted(list(basic_info_keys))
|
||||
fast_info_keys.update(set(ticker.fast_info.keys()))
|
||||
fast_info_keys = sorted(list(fast_info_keys))
|
||||
|
||||
key_rename_map = {}
|
||||
key_rename_map["currency"] = "currency"
|
||||
key_rename_map["quote_type"] = "quoteType"
|
||||
key_rename_map["timezone"] = "exchangeTimezoneName"
|
||||
|
||||
key_rename_map["last_price"] = ["currentPrice", "regularMarketPrice"]
|
||||
key_rename_map["open"] = ["open", "regularMarketOpen"]
|
||||
key_rename_map["day_high"] = ["dayHigh", "regularMarketDayHigh"]
|
||||
key_rename_map["day_low"] = ["dayLow", "regularMarketDayLow"]
|
||||
key_rename_map["previous_close"] = ["previousClose", "regularMarketPreviousClose"]
|
||||
|
||||
# preMarketPrice
|
||||
key_rename_map["previous_close"] = ["previousClose"]
|
||||
key_rename_map["regular_market_previous_close"] = ["regularMarketPreviousClose"]
|
||||
|
||||
key_rename_map["fifty_day_average"] = "fiftyDayAverage"
|
||||
key_rename_map["two_hundred_day_average"] = "twoHundredDayAverage"
|
||||
key_rename_map["year_change"] = "52WeekChange"
|
||||
key_rename_map["year_change"] = ["52WeekChange", "fiftyTwoWeekChange"]
|
||||
key_rename_map["year_high"] = "fiftyTwoWeekHigh"
|
||||
key_rename_map["year_low"] = "fiftyTwoWeekLow"
|
||||
|
||||
@@ -719,25 +888,31 @@ class TestTickerInfo(unittest.TestCase):
|
||||
key_rename_map["three_month_average_volume"] = "averageVolume"
|
||||
|
||||
key_rename_map["market_cap"] = "marketCap"
|
||||
key_rename_map["shares"] = "floatShares"
|
||||
key_rename_map["timezone"] = "exchangeTimezoneName"
|
||||
key_rename_map["shares"] = "sharesOutstanding"
|
||||
|
||||
approximate_keys = {"fifty_day_average", "ten_day_average_volume"}
|
||||
approximate_keys.update({"market_cap"})
|
||||
for k in list(key_rename_map.keys()):
|
||||
if '_' in k:
|
||||
key_rename_map[yf.utils.snake_case_2_camelCase(k)] = key_rename_map[k]
|
||||
|
||||
# bad_keys = []
|
||||
# Note: share count items in info[] are bad. Sometimes the float > outstanding!
|
||||
# So often fast_info["shares"] does not match.
|
||||
# Why isn't fast_info["shares"] wrong? Because using it to calculate market cap always correct.
|
||||
bad_keys = {"shares"}
|
||||
|
||||
# Loose tolerance for averages, no idea why don't match info[]. Is info wrong?
|
||||
custom_tolerances = {}
|
||||
custom_tolerances["year_change"] = 1.0
|
||||
# custom_tolerances["ten_day_average_volume"] = 1e-3
|
||||
custom_tolerances["ten_day_average_volume"] = 1e-1
|
||||
# custom_tolerances["three_month_average_volume"] = 1e-2
|
||||
custom_tolerances["three_month_average_volume"] = 5e-1
|
||||
custom_tolerances["fifty_day_average"] = 1e-2
|
||||
custom_tolerances["two_hundred_day_average"] = 1e-2
|
||||
for k in list(custom_tolerances.keys()):
|
||||
if '_' in k:
|
||||
custom_tolerances[yf.utils.snake_case_2_camelCase(k)] = custom_tolerances[k]
|
||||
|
||||
for k in basic_info_keys:
|
||||
for k in fast_info_keys:
|
||||
if k in key_rename_map:
|
||||
k2 = key_rename_map[k]
|
||||
else:
|
||||
@@ -749,11 +924,10 @@ class TestTickerInfo(unittest.TestCase):
|
||||
for m in k2:
|
||||
for ticker in self.tickers:
|
||||
if not m in ticker.info:
|
||||
print(sorted(list(ticker.info.keys())))
|
||||
raise Exception("Need to add/fix mapping for basic_info key", k)
|
||||
# print(f"symbol={ticker.ticker}: fast_info key '{k}' mapped to info key '{m}' but not present in info")
|
||||
continue
|
||||
|
||||
if k in bad_keys:
|
||||
# Doesn't match, investigate why
|
||||
continue
|
||||
|
||||
if k in custom_tolerances:
|
||||
@@ -762,14 +936,25 @@ class TestTickerInfo(unittest.TestCase):
|
||||
rtol = 5e-3
|
||||
# rtol = 1e-4
|
||||
|
||||
print(f"Testing key {m} -> {k} ticker={ticker.ticker}")
|
||||
# if k in approximate_keys:
|
||||
v1 = ticker.basic_info[k]
|
||||
v2 = ticker.info[m]
|
||||
if isinstance(v1, float) or isinstance(v2, int):
|
||||
self.assertTrue(np.isclose(v1, v2, rtol=rtol), f"{k}: {v1} != {v2}")
|
||||
else:
|
||||
self.assertEqual(v1, v2, f"{k}: {v1} != {v2}")
|
||||
correct = ticker.info[m]
|
||||
test = ticker.fast_info[k]
|
||||
# print(f"Testing: symbol={ticker.ticker} m={m} k={k}: test={test} vs correct={correct}")
|
||||
if k in ["market_cap","marketCap"] and ticker.fast_info["currency"] in ["GBp", "ILA"]:
|
||||
# Adjust for currency to match Yahoo:
|
||||
test *= 0.01
|
||||
try:
|
||||
if correct is None:
|
||||
self.assertTrue(test is None or (not np.isnan(test)), f"{k}: {test} must be None or real value because correct={correct}")
|
||||
elif isinstance(test, float) or isinstance(correct, int):
|
||||
self.assertTrue(np.isclose(test, correct, rtol=rtol), f"{ticker.ticker} {k}: {test} != {correct}")
|
||||
else:
|
||||
self.assertEqual(test, correct, f"{k}: {test} != {correct}")
|
||||
except:
|
||||
if k in ["regularMarketPreviousClose"] and ticker.ticker in ["ADS.DE"]:
|
||||
# Yahoo is wrong, is returning post-market close not regular
|
||||
continue
|
||||
else:
|
||||
raise
|
||||
|
||||
|
||||
|
||||
@@ -780,6 +965,7 @@ def suite():
|
||||
suite.addTest(TestTickerHolders('Test holders'))
|
||||
suite.addTest(TestTickerHistory('Test Ticker history'))
|
||||
suite.addTest(TestTickerMiscFinancials('Test misc financials'))
|
||||
suite.addTest(TestTickerInfo('Test info & fast_info'))
|
||||
return suite
|
||||
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@ from .ticker import Ticker
|
||||
from .tickers import Tickers
|
||||
from .multi import download
|
||||
from .utils import set_tz_cache_location
|
||||
from .data import enable_prune_session_cache, disable_prune_session_cache
|
||||
|
||||
__version__ = version.version
|
||||
__author__ = "Ran Aroussi"
|
||||
@@ -44,3 +45,4 @@ def pdr_override():
|
||||
|
||||
|
||||
__all__ = ['download', 'Ticker', 'Tickers', 'pdr_override', 'set_tz_cache_location']
|
||||
__all__ += ['enable_prune_session_cache', 'disable_prune_session_cache']
|
||||
|
||||
539
yfinance/base.py
539
yfinance/base.py
@@ -23,6 +23,7 @@ from __future__ import print_function
|
||||
|
||||
import time as _time
|
||||
import datetime as _datetime
|
||||
import dateutil as _dateutil
|
||||
from typing import Optional
|
||||
|
||||
import pandas as _pd
|
||||
@@ -47,16 +48,19 @@ _SCRAPE_URL_ = 'https://finance.yahoo.com/quote'
|
||||
_ROOT_URL_ = 'https://finance.yahoo.com'
|
||||
|
||||
|
||||
class BasicInfo:
|
||||
class FastInfo:
|
||||
# Contain small subset of info[] items that can be fetched faster elsewhere.
|
||||
# Imitates a dict.
|
||||
def __init__(self, tickerBaseObject):
|
||||
self._tkr = tickerBaseObject
|
||||
|
||||
self._prices_1y = None
|
||||
self._prices_1wk_1h_prepost = None
|
||||
self._prices_1wk_1h_reg = None
|
||||
self._md = None
|
||||
|
||||
self._currency = None
|
||||
self._quote_type = None
|
||||
self._exchange = None
|
||||
self._timezone = None
|
||||
|
||||
@@ -71,6 +75,8 @@ class BasicInfo:
|
||||
|
||||
self._prev_close = None
|
||||
|
||||
self._reg_prev_close = None
|
||||
|
||||
self._50d_day_average = None
|
||||
self._200d_day_average = None
|
||||
self._year_high = None
|
||||
@@ -80,17 +86,49 @@ class BasicInfo:
|
||||
self._10d_avg_vol = None
|
||||
self._3mo_avg_vol = None
|
||||
|
||||
# attrs = utils.attributes(self)
|
||||
# self.keys = attrs.keys()
|
||||
# utils.attributes is calling each method, bad! Have to hardcode
|
||||
_properties = ["currency", "quote_type", "exchange", "timezone"]
|
||||
_properties += ["shares", "market_cap"]
|
||||
_properties += ["last_price", "previous_close", "open", "day_high", "day_low"]
|
||||
_properties += ["regular_market_previous_close"]
|
||||
_properties += ["last_volume"]
|
||||
_properties += ["fifty_day_average", "two_hundred_day_average", "ten_day_average_volume", "three_month_average_volume"]
|
||||
_properties += ["year_high", "year_low", "year_change"]
|
||||
|
||||
# Because released before fixing key case, need to officially support
|
||||
# camel-case but also secretly support snake-case
|
||||
base_keys = [k for k in _properties if not '_' in k]
|
||||
|
||||
sc_keys = [k for k in _properties if '_' in k]
|
||||
|
||||
self._sc_to_cc_key = {k:utils.snake_case_2_camelCase(k) for k in sc_keys}
|
||||
self._cc_to_sc_key = {v:k for k,v in self._sc_to_cc_key.items()}
|
||||
|
||||
self._public_keys = sorted(base_keys + list(self._sc_to_cc_key.values()))
|
||||
self._keys = sorted(self._public_keys + sc_keys)
|
||||
|
||||
# dict imitation:
|
||||
def keys(self):
|
||||
attrs = utils.attributes(self)
|
||||
return attrs.keys()
|
||||
return self._public_keys
|
||||
def items(self):
|
||||
return [(k,self[k]) for k in self.keys()]
|
||||
return [(k,self[k]) for k in self._public_keys]
|
||||
def values(self):
|
||||
return [self[k] for k in self._public_keys]
|
||||
def get(self, key, default=None):
|
||||
if key in self.keys():
|
||||
if key in self._cc_to_sc_key:
|
||||
key = self._cc_to_sc_key[key]
|
||||
return self[key]
|
||||
return default
|
||||
def __getitem__(self, k):
|
||||
if not isinstance(k, str):
|
||||
raise KeyError(f"key must be a string")
|
||||
if not k in self.keys():
|
||||
raise KeyError(f"'{k}' not valid key. Examine 'BasicInfo.keys()'")
|
||||
if not k in self._keys:
|
||||
raise KeyError(f"'{k}' not valid key. Examine 'FastInfo.keys()'")
|
||||
if k in self._cc_to_sc_key:
|
||||
k = self._cc_to_sc_key[k]
|
||||
return getattr(self, k)
|
||||
def __contains__(self, k):
|
||||
return k in self.keys()
|
||||
@@ -102,9 +140,13 @@ class BasicInfo:
|
||||
def __repr__(self):
|
||||
return self.__str__()
|
||||
|
||||
def toJSON(self, indent=4):
|
||||
d = {k:self[k] for k in self.keys()}
|
||||
return _json.dumps({k:self[k] for k in self.keys()}, indent=indent)
|
||||
|
||||
def _get_1y_prices(self, fullDaysOnly=False):
|
||||
if self._prices_1y is None:
|
||||
self._prices_1y = self._tkr.history(period="380d", auto_adjust=False)
|
||||
self._prices_1y = self._tkr.history(period="380d", auto_adjust=False, debug=False, keepna=True)
|
||||
self._md = self._tkr.get_history_metadata()
|
||||
try:
|
||||
ctp = self._md["currentTradingPeriod"]
|
||||
@@ -118,14 +160,25 @@ class BasicInfo:
|
||||
raise
|
||||
|
||||
if self._prices_1y.empty:
|
||||
return self.self._prices_1y
|
||||
return self._prices_1y
|
||||
|
||||
dt1 = self._prices_1y.index[-1]
|
||||
dnow = pd.Timestamp.utcnow().tz_convert(self.timezone).date()
|
||||
d1 = dnow
|
||||
d0 = (d1 + _datetime.timedelta(days=1)) - utils._interval_to_timedelta("1y")
|
||||
if fullDaysOnly and self._exchange_open_now():
|
||||
# Exclude today
|
||||
dt1 -= utils._interval_to_timedelta("1h")
|
||||
dt0 = dt1 - utils._interval_to_timedelta("1y") + utils._interval_to_timedelta("1d")
|
||||
return self._prices_1y.loc[dt0:dt1]
|
||||
d1 -= utils._interval_to_timedelta("1d")
|
||||
return self._prices_1y.loc[str(d0):str(d1)]
|
||||
|
||||
def _get_1wk_1h_prepost_prices(self):
|
||||
if self._prices_1wk_1h_prepost is None:
|
||||
self._prices_1wk_1h_prepost = self._tkr.history(period="1wk", interval="1h", auto_adjust=False, prepost=True, debug=False)
|
||||
return self._prices_1wk_1h_prepost
|
||||
|
||||
def _get_1wk_1h_reg_prices(self):
|
||||
if self._prices_1wk_1h_reg is None:
|
||||
self._prices_1wk_1h_reg = self._tkr.history(period="1wk", interval="1h", auto_adjust=False, prepost=False, debug=False)
|
||||
return self._prices_1wk_1h_reg
|
||||
|
||||
def _get_exchange_metadata(self):
|
||||
if self._md is not None:
|
||||
@@ -169,8 +222,16 @@ class BasicInfo:
|
||||
self._currency = md["currency"]
|
||||
return self._currency
|
||||
|
||||
def _currency_is_cents(self):
|
||||
return self.currency in ["GBp", "ILA"]
|
||||
@property
|
||||
def quote_type(self):
|
||||
if self._quote_type is not None:
|
||||
return self._quote_type
|
||||
|
||||
if self._tkr._history_metadata is None:
|
||||
self._get_1y_prices()
|
||||
md = self._tkr.get_history_metadata()
|
||||
self._quote_type = md["instrumentType"]
|
||||
return self._quote_type
|
||||
|
||||
@property
|
||||
def exchange(self):
|
||||
@@ -197,36 +258,74 @@ class BasicInfo:
|
||||
if shares is None:
|
||||
# Requesting 18 months failed, so fallback to shares which should include last year
|
||||
shares = self._tkr.get_shares()
|
||||
if shares is None:
|
||||
raise Exception(f"{self._tkr.ticker}: Cannot retrieve share count for calculating market cap")
|
||||
if isinstance(shares, pd.DataFrame):
|
||||
shares = shares[shares.columns[0]]
|
||||
self._shares = shares.iloc[-1]
|
||||
if shares is not None:
|
||||
if isinstance(shares, pd.DataFrame):
|
||||
shares = shares[shares.columns[0]]
|
||||
self._shares = int(shares.iloc[-1])
|
||||
return self._shares
|
||||
|
||||
@property
|
||||
def last_price(self):
|
||||
if self._last_price is not None:
|
||||
return self._last_price
|
||||
# self._last_price = self._get_exchange_metadata()["regularMarketPrice"]
|
||||
prices = self._get_1y_prices()
|
||||
self._last_price = _np.nan if prices.empty else prices["Close"].iloc[-1]
|
||||
if prices.empty:
|
||||
self._last_price = self._get_exchange_metadata()["regularMarketPrice"]
|
||||
else:
|
||||
self._last_price = float(prices["Close"].iloc[-1])
|
||||
if _np.isnan(self._last_price):
|
||||
self._last_price = self._get_exchange_metadata()["regularMarketPrice"]
|
||||
return self._last_price
|
||||
|
||||
@property
|
||||
def previous_close(self):
|
||||
if self._prev_close is not None:
|
||||
return self._prev_close
|
||||
prices = self._get_1y_prices()
|
||||
self._prev_close = _np.nan if prices.empty else prices["Close"].iloc[-2]
|
||||
prices = self._get_1wk_1h_prepost_prices()
|
||||
prices = prices[["Close"]].groupby(prices.index.date).last()
|
||||
if prices.shape[0] < 2:
|
||||
# Very few symbols have previousClose despite no
|
||||
# no trading data. E.g. 'QCSTIX'.
|
||||
# So fallback to original info[] if available.
|
||||
self._tkr.info # trigger fetch
|
||||
if "previousClose" in self._tkr._quote._retired_info:
|
||||
self._prev_close = self._tkr._quote._retired_info["previousClose"]
|
||||
else:
|
||||
self._prev_close = float(prices["Close"].iloc[-2])
|
||||
return self._prev_close
|
||||
|
||||
@property
|
||||
def regular_market_previous_close(self):
|
||||
if self._reg_prev_close is not None:
|
||||
return self._reg_prev_close
|
||||
prices = self._get_1y_prices()
|
||||
if prices.shape[0] == 1:
|
||||
# Tiny % of tickers don't return daily history before last trading day,
|
||||
# so backup option is hourly history:
|
||||
prices = self._get_1wk_1h_reg_prices()
|
||||
prices = prices[["Close"]].groupby(prices.index.date).last()
|
||||
if prices.shape[0] < 2:
|
||||
# Very few symbols have regularMarketPreviousClose despite no
|
||||
# no trading data. E.g. 'QCSTIX'.
|
||||
# So fallback to original info[] if available.
|
||||
self._tkr.info # trigger fetch
|
||||
if "regularMarketPreviousClose" in self._tkr._quote._retired_info:
|
||||
self._reg_prev_close = self._tkr._quote._retired_info["regularMarketPreviousClose"]
|
||||
else:
|
||||
self._reg_prev_close = float(prices["Close"].iloc[-2])
|
||||
return self._reg_prev_close
|
||||
|
||||
@property
|
||||
def open(self):
|
||||
if self._open is not None:
|
||||
return self._open
|
||||
prices = self._get_1y_prices()
|
||||
self._open = _np.nan if prices.empty else prices["Open"].iloc[-1]
|
||||
if prices.empty:
|
||||
self._open = None
|
||||
else:
|
||||
self._open = float(prices["Open"].iloc[-1])
|
||||
if _np.isnan(self._open):
|
||||
self._open = None
|
||||
return self._open
|
||||
|
||||
@property
|
||||
@@ -234,7 +333,12 @@ class BasicInfo:
|
||||
if self._day_high is not None:
|
||||
return self._day_high
|
||||
prices = self._get_1y_prices()
|
||||
self._day_high = _np.nan if prices.empty else prices["High"].iloc[-1]
|
||||
if prices.empty:
|
||||
self._day_high = None
|
||||
else:
|
||||
self._day_high = float(prices["High"].iloc[-1])
|
||||
if _np.isnan(self._day_high):
|
||||
self._day_high = None
|
||||
return self._day_high
|
||||
|
||||
@property
|
||||
@@ -242,7 +346,12 @@ class BasicInfo:
|
||||
if self._day_low is not None:
|
||||
return self._day_low
|
||||
prices = self._get_1y_prices()
|
||||
self._day_low = _np.nan if prices.empty else prices["Low"].iloc[-1]
|
||||
if prices.empty:
|
||||
self._day_low = None
|
||||
else:
|
||||
self._day_low = float(prices["Low"].iloc[-1])
|
||||
if _np.isnan(self._day_low):
|
||||
self._day_low = None
|
||||
return self._day_low
|
||||
|
||||
@property
|
||||
@@ -250,7 +359,7 @@ class BasicInfo:
|
||||
if self._last_volume is not None:
|
||||
return self._last_volume
|
||||
prices = self._get_1y_prices()
|
||||
self._last_volume = 0 if prices.empty else prices["Volume"].iloc[-1]
|
||||
self._last_volume = None if prices.empty else int(prices["Volume"].iloc[-1])
|
||||
return self._last_volume
|
||||
|
||||
@property
|
||||
@@ -260,14 +369,14 @@ class BasicInfo:
|
||||
|
||||
prices = self._get_1y_prices(fullDaysOnly=True)
|
||||
if prices.empty:
|
||||
self._50d_day_average = _np.nan
|
||||
self._50d_day_average = None
|
||||
else:
|
||||
n = prices.shape[0]
|
||||
a = n-50
|
||||
b = n
|
||||
if a < 0:
|
||||
a = 0
|
||||
self._50d_day_average = prices["Close"].iloc[a:b].mean()
|
||||
self._50d_day_average = float(prices["Close"].iloc[a:b].mean())
|
||||
|
||||
return self._50d_day_average
|
||||
|
||||
@@ -278,7 +387,7 @@ class BasicInfo:
|
||||
|
||||
prices = self._get_1y_prices(fullDaysOnly=True)
|
||||
if prices.empty:
|
||||
self._200d_day_average = _np.nan
|
||||
self._200d_day_average = None
|
||||
else:
|
||||
n = prices.shape[0]
|
||||
a = n-200
|
||||
@@ -286,7 +395,7 @@ class BasicInfo:
|
||||
if a < 0:
|
||||
a = 0
|
||||
|
||||
self._200d_day_average = prices["Close"].iloc[a:b].mean()
|
||||
self._200d_day_average = float(prices["Close"].iloc[a:b].mean())
|
||||
|
||||
return self._200d_day_average
|
||||
|
||||
@@ -297,14 +406,14 @@ class BasicInfo:
|
||||
|
||||
prices = self._get_1y_prices(fullDaysOnly=True)
|
||||
if prices.empty:
|
||||
self._10d_avg_vol = 0
|
||||
self._10d_avg_vol = None
|
||||
else:
|
||||
n = prices.shape[0]
|
||||
a = n-10
|
||||
b = n
|
||||
if a < 0:
|
||||
a = 0
|
||||
self._10d_avg_vol = prices["Volume"].iloc[a:b].mean()
|
||||
self._10d_avg_vol = int(prices["Volume"].iloc[a:b].mean())
|
||||
|
||||
return self._10d_avg_vol
|
||||
|
||||
@@ -315,11 +424,11 @@ class BasicInfo:
|
||||
|
||||
prices = self._get_1y_prices(fullDaysOnly=True)
|
||||
if prices.empty:
|
||||
self._3mo_avg_vol = 0
|
||||
self._3mo_avg_vol = None
|
||||
else:
|
||||
dt1 = prices.index[-1]
|
||||
dt0 = dt1 - utils._interval_to_timedelta("3mo") + utils._interval_to_timedelta("1d")
|
||||
self._3mo_avg_vol = prices.loc[dt0:dt1, "Volume"].mean()
|
||||
self._3mo_avg_vol = int(prices.loc[dt0:dt1, "Volume"].mean())
|
||||
|
||||
return self._3mo_avg_vol
|
||||
|
||||
@@ -329,7 +438,9 @@ class BasicInfo:
|
||||
return self._year_high
|
||||
|
||||
prices = self._get_1y_prices(fullDaysOnly=True)
|
||||
self._year_high = prices["High"].max()
|
||||
if prices.empty:
|
||||
prices = self._get_1y_prices(fullDaysOnly=False)
|
||||
self._year_high = float(prices["High"].max())
|
||||
return self._year_high
|
||||
|
||||
@property
|
||||
@@ -338,7 +449,9 @@ class BasicInfo:
|
||||
return self._year_low
|
||||
|
||||
prices = self._get_1y_prices(fullDaysOnly=True)
|
||||
self._year_low = prices["Low"].min()
|
||||
if prices.empty:
|
||||
prices = self._get_1y_prices(fullDaysOnly=False)
|
||||
self._year_low = float(prices["Low"].min())
|
||||
return self._year_low
|
||||
|
||||
@property
|
||||
@@ -347,7 +460,9 @@ class BasicInfo:
|
||||
return self._year_change
|
||||
|
||||
prices = self._get_1y_prices(fullDaysOnly=True)
|
||||
self._year_change = (prices["Close"].iloc[-1] - prices["Close"].iloc[0]) / prices["Close"].iloc[0]
|
||||
if prices.shape[0] >= 2:
|
||||
self._year_change = (prices["Close"].iloc[-1] - prices["Close"].iloc[0]) / prices["Close"].iloc[0]
|
||||
self._year_change = float(self._year_change)
|
||||
return self._year_change
|
||||
|
||||
@property
|
||||
@@ -355,9 +470,23 @@ class BasicInfo:
|
||||
if self._mcap is not None:
|
||||
return self._mcap
|
||||
|
||||
self._mcap = self.shares * self.last_price
|
||||
if self._currency_is_cents():
|
||||
self._mcap *= 0.01
|
||||
try:
|
||||
shares = self.shares
|
||||
except Exception as e:
|
||||
if "Cannot retrieve share count" in str(e):
|
||||
shares = None
|
||||
else:
|
||||
raise
|
||||
|
||||
if shares is None:
|
||||
# Very few symbols have marketCap despite no share count.
|
||||
# E.g. 'BTC-USD'
|
||||
# So fallback to original info[] if available.
|
||||
self._tkr.info
|
||||
if "marketCap" in self._tkr._quote._retired_info:
|
||||
self._mcap = self._tkr._quote._retired_info["marketCap"]
|
||||
else:
|
||||
self._mcap = float(shares * self.last_price)
|
||||
return self._mcap
|
||||
|
||||
|
||||
@@ -391,7 +520,7 @@ class TickerBase:
|
||||
self._quote = Quote(self._data)
|
||||
self._fundamentals = Fundamentals(self._data)
|
||||
|
||||
self._basic_info = BasicInfo(self)
|
||||
self._fast_info = FastInfo(self)
|
||||
|
||||
def stats(self, proxy=None):
|
||||
ticker_url = "{}/{}".format(self._scrape_url, self.ticker)
|
||||
@@ -426,8 +555,9 @@ class TickerBase:
|
||||
Adjust all OHLC automatically? Default is True
|
||||
back_adjust: bool
|
||||
Back-adjusted data to mimic true historical prices
|
||||
repair: bool
|
||||
Detect currency unit 100x mixups and attempt repair
|
||||
repair: bool or "silent"
|
||||
Detect currency unit 100x mixups and attempt repair.
|
||||
If True, fix & print summary. If "silent", just fix.
|
||||
Default is False
|
||||
keepna: bool
|
||||
Keep NaN rows returned by Yahoo?
|
||||
@@ -465,7 +595,8 @@ class TickerBase:
|
||||
return utils.empty_df()
|
||||
|
||||
if end is None:
|
||||
end = int(_time.time())
|
||||
midnight = pd.Timestamp.utcnow().tz_convert(tz).ceil("D")
|
||||
end = int(midnight.timestamp())
|
||||
else:
|
||||
end = utils._parse_user_dt(end, tz)
|
||||
if start is None:
|
||||
@@ -521,7 +652,10 @@ class TickerBase:
|
||||
"Our engineers are working quickly to resolve "
|
||||
"the issue. Thank you for your patience.")
|
||||
|
||||
data = data.json()
|
||||
if "yf_json" in dir(data):
|
||||
data = data.yf_json
|
||||
else:
|
||||
data = data.json()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -530,6 +664,7 @@ class TickerBase:
|
||||
self._history_metadata = data["chart"]["result"][0]["meta"]
|
||||
except Exception:
|
||||
self._history_metadata = {}
|
||||
self._history_metadata = utils.format_history_metadata(self._history_metadata)
|
||||
|
||||
err_msg = "No data found for this date range, symbol may be delisted"
|
||||
fail = False
|
||||
@@ -606,6 +741,9 @@ class TickerBase:
|
||||
quotes = utils.set_df_tz(quotes, params["interval"], tz_exchange)
|
||||
quotes = utils.fix_Yahoo_dst_issue(quotes, params["interval"])
|
||||
quotes = utils.fix_Yahoo_returning_live_separate(quotes, params["interval"], tz_exchange)
|
||||
intraday = params["interval"][-1] in ("m", 'h')
|
||||
if not prepost and intraday and "tradingPeriods" in self._history_metadata:
|
||||
quotes = utils.fix_Yahoo_returning_prepost_unrequested(quotes, params["interval"], self._history_metadata)
|
||||
|
||||
# actions
|
||||
dividends, splits, capital_gains = utils.parse_actions(data["chart"]["result"][0])
|
||||
@@ -670,10 +808,10 @@ class TickerBase:
|
||||
else:
|
||||
df["Capital Gains"] = 0.0
|
||||
|
||||
if repair:
|
||||
if repair==True or repair=="silent":
|
||||
# Do this before auto/back adjust
|
||||
df = self._fix_zeroes(df, interval, tz_exchange)
|
||||
df = self._fix_unit_mixups(df, interval, tz_exchange)
|
||||
df = self._fix_zeroes(df, interval, tz_exchange, prepost, silent=(repair=="silent"))
|
||||
df = self._fix_unit_mixups(df, interval, tz_exchange, prepost, silent=(repair=="silent"))
|
||||
|
||||
# Auto/back adjust
|
||||
try:
|
||||
@@ -717,31 +855,40 @@ class TickerBase:
|
||||
|
||||
# ------------------------
|
||||
|
||||
def _reconstruct_intervals_batch(self, df, interval, tag=-1):
|
||||
def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1, silent=False):
|
||||
if not isinstance(df, _pd.DataFrame):
|
||||
raise Exception("'df' must be a Pandas DataFrame not", type(df))
|
||||
if interval == "1m":
|
||||
# Can't go smaller than 1m so can't reconstruct
|
||||
return df
|
||||
|
||||
# Reconstruct values in df using finer-grained price data. Delimiter marks what to reconstruct
|
||||
|
||||
debug = False
|
||||
# debug = True
|
||||
|
||||
if interval[1:] in ['d', 'wk', 'mo']:
|
||||
# Interday data always includes pre & post
|
||||
prepost = True
|
||||
intraday = False
|
||||
else:
|
||||
intraday = True
|
||||
|
||||
price_cols = [c for c in ["Open", "High", "Low", "Close", "Adj Close"] if c in df]
|
||||
data_cols = price_cols + ["Volume"]
|
||||
|
||||
# If interval is weekly then can construct with daily. But if smaller intervals then
|
||||
# restricted to recent times:
|
||||
# - daily = hourly restricted to last 730 days
|
||||
sub_interval = None
|
||||
td_range = None
|
||||
if interval == "1wk":
|
||||
# Correct by fetching week of daily data
|
||||
sub_interval = "1d"
|
||||
td_range = _datetime.timedelta(days=7)
|
||||
elif interval == "1d":
|
||||
# Correct by fetching day of hourly data
|
||||
sub_interval = "1h"
|
||||
td_range = _datetime.timedelta(days=1)
|
||||
elif interval == "1h":
|
||||
sub_interval = "30m"
|
||||
td_range = _datetime.timedelta(hours=1)
|
||||
intervals = ["1wk", "1d", "1h", "30m", "15m", "5m", "2m", "1m"]
|
||||
itds = {i:utils._interval_to_timedelta(interval) for i in intervals}
|
||||
nexts = {intervals[i]:intervals[i+1] for i in range(len(intervals)-1)}
|
||||
min_lookbacks = {"1wk":None, "1d":None, "1h":_datetime.timedelta(days=730)}
|
||||
for i in ["30m", "15m", "5m", "2m"]:
|
||||
min_lookbacks[i] = _datetime.timedelta(days=60)
|
||||
min_lookbacks["1m"] = _datetime.timedelta(days=30)
|
||||
if interval in nexts:
|
||||
sub_interval = nexts[interval]
|
||||
td_range = itds[interval]
|
||||
else:
|
||||
print("WARNING: Have not implemented repair for '{}' interval. Contact developers".format(interval))
|
||||
raise Exception("why here")
|
||||
@@ -753,76 +900,107 @@ class TickerBase:
|
||||
f_repair_rows = f_repair.any(axis=1)
|
||||
|
||||
# Ignore old intervals for which Yahoo won't return finer data:
|
||||
if sub_interval == "1h":
|
||||
f_recent = _datetime.date.today() - df.index.date < _datetime.timedelta(days=730)
|
||||
m = min_lookbacks[sub_interval]
|
||||
if m is None:
|
||||
min_dt = None
|
||||
else:
|
||||
m -= _datetime.timedelta(days=1) # allow space for 1-day padding
|
||||
min_dt = _pd.Timestamp.utcnow() - m
|
||||
min_dt = min_dt.tz_convert(df.index.tz).ceil("D")
|
||||
if debug:
|
||||
print(f"- min_dt={min_dt} interval={interval} sub_interval={sub_interval}")
|
||||
if min_dt is not None:
|
||||
f_recent = df.index >= min_dt
|
||||
f_repair_rows = f_repair_rows & f_recent
|
||||
elif sub_interval in ["30m", "15m"]:
|
||||
f_recent = _datetime.date.today() - df.index.date < _datetime.timedelta(days=60)
|
||||
f_repair_rows = f_repair_rows & f_recent
|
||||
if not f_repair_rows.any():
|
||||
print("data too old to fix")
|
||||
return df
|
||||
if not f_repair_rows.any():
|
||||
if debug:
|
||||
print("data too old to repair")
|
||||
return df
|
||||
|
||||
dts_to_repair = df.index[f_repair_rows]
|
||||
indices_to_repair = _np.where(f_repair_rows)[0]
|
||||
|
||||
if len(dts_to_repair) == 0:
|
||||
if debug:
|
||||
print("dts_to_repair[] is empty")
|
||||
return df
|
||||
|
||||
df_v2 = df.copy()
|
||||
df_noNa = df[~df[price_cols].isna().any(axis=1)]
|
||||
f_good = ~(df[price_cols].isna().any(axis=1))
|
||||
f_good = f_good & (df[price_cols].to_numpy()!=tag).all(axis=1)
|
||||
df_good = df[f_good]
|
||||
|
||||
# Group nearby NaN-intervals together to reduce number of Yahoo fetches
|
||||
dts_groups = [[dts_to_repair[0]]]
|
||||
last_dt = dts_to_repair[0]
|
||||
last_ind = indices_to_repair[0]
|
||||
td = utils._interval_to_timedelta(interval)
|
||||
if interval == "1mo":
|
||||
grp_td_threshold = _datetime.timedelta(days=28)
|
||||
elif interval == "1wk":
|
||||
grp_td_threshold = _datetime.timedelta(days=28)
|
||||
elif interval == "1d":
|
||||
grp_td_threshold = _datetime.timedelta(days=14)
|
||||
elif interval == "1h":
|
||||
grp_td_threshold = _datetime.timedelta(days=7)
|
||||
# Note on setting max size: have to allow space for adding good data
|
||||
if sub_interval == "1mo":
|
||||
grp_max_size = _dateutil.relativedelta.relativedelta(years=2)
|
||||
elif sub_interval == "1wk":
|
||||
grp_max_size = _dateutil.relativedelta.relativedelta(years=2)
|
||||
elif sub_interval == "1d":
|
||||
grp_max_size = _dateutil.relativedelta.relativedelta(years=2)
|
||||
elif sub_interval == "1h":
|
||||
grp_max_size = _dateutil.relativedelta.relativedelta(years=1)
|
||||
elif sub_interval == "1m":
|
||||
grp_max_size = _datetime.timedelta(days=5) # allow 2 days for buffer below
|
||||
else:
|
||||
grp_td_threshold = _datetime.timedelta(days=2)
|
||||
# grp_td_threshold = _datetime.timedelta(days=7)
|
||||
grp_max_size = _datetime.timedelta(days=30)
|
||||
if debug:
|
||||
print("- grp_max_size =", grp_max_size)
|
||||
for i in range(1, len(dts_to_repair)):
|
||||
ind = indices_to_repair[i]
|
||||
dt = dts_to_repair[i]
|
||||
if (dt-dts_groups[-1][-1]) < grp_td_threshold:
|
||||
dts_groups[-1].append(dt)
|
||||
elif ind - last_ind <= 3:
|
||||
if dt.date() < dts_groups[-1][0].date()+grp_max_size:
|
||||
dts_groups[-1].append(dt)
|
||||
else:
|
||||
dts_groups.append([dt])
|
||||
last_dt = dt
|
||||
last_ind = ind
|
||||
|
||||
if debug:
|
||||
print("Repair groups:")
|
||||
for g in dts_groups:
|
||||
print(f"- {g[0]} -> {g[-1]}")
|
||||
|
||||
# Add some good data to each group, so can calibrate later:
|
||||
for i in range(len(dts_groups)):
|
||||
g = dts_groups[i]
|
||||
g0 = g[0]
|
||||
i0 = df_noNa.index.get_loc(g0)
|
||||
i0 = df_good.index.get_indexer([g0], method="nearest")[0]
|
||||
if i0 > 0:
|
||||
dts_groups[i].insert(0, df_noNa.index[i0-1])
|
||||
if (min_dt is None or df_good.index[i0-1] >= min_dt) and \
|
||||
((not intraday) or df_good.index[i0-1].date()==g0.date()):
|
||||
i0 -= 1
|
||||
gl = g[-1]
|
||||
il = df_noNa.index.get_loc(gl)
|
||||
if il < len(df_noNa)-1:
|
||||
dts_groups[i].append(df_noNa.index[il+1])
|
||||
il = df_good.index.get_indexer([gl], method="nearest")[0]
|
||||
if il < len(df_good)-1:
|
||||
if (not intraday) or df_good.index[il+1].date()==gl.date():
|
||||
il += 1
|
||||
good_dts = df_good.index[i0:il+1]
|
||||
dts_groups[i] += good_dts.to_list()
|
||||
dts_groups[i].sort()
|
||||
|
||||
n_fixed = 0
|
||||
for g in dts_groups:
|
||||
df_block = df[df.index.isin(g)]
|
||||
if debug:
|
||||
print("- df_block:")
|
||||
print(df_block)
|
||||
|
||||
start_dt = g[0]
|
||||
start_d = start_dt.date()
|
||||
if sub_interval == "1h" and (_datetime.date.today() - start_d) > _datetime.timedelta(days=729):
|
||||
# Don't bother requesting more price data, Yahoo will reject
|
||||
if debug:
|
||||
print(f"- Don't bother requesting {sub_interval} price data, Yahoo will reject")
|
||||
continue
|
||||
elif sub_interval in ["30m", "15m"] and (_datetime.date.today() - start_d) > _datetime.timedelta(days=59):
|
||||
# Don't bother requesting more price data, Yahoo will reject
|
||||
if debug:
|
||||
print(f"- Don't bother requesting {sub_interval} price data, Yahoo will reject")
|
||||
continue
|
||||
|
||||
td_1d = _datetime.timedelta(days=1)
|
||||
@@ -836,15 +1014,25 @@ class TickerBase:
|
||||
fetch_start = g[0]
|
||||
fetch_end = g[-1] + td_range
|
||||
|
||||
prepost = interval == "1d"
|
||||
df_fine = self.history(start=fetch_start, end=fetch_end, interval=sub_interval, auto_adjust=False, prepost=prepost, repair=False, keepna=True)
|
||||
# The first and last day returned by Yahoo can be slightly wrong, so add buffer:
|
||||
fetch_start -= td_1d
|
||||
fetch_end += td_1d
|
||||
if intraday:
|
||||
fetch_start = fetch_start.date()
|
||||
fetch_end = fetch_end.date()+td_1d
|
||||
if debug:
|
||||
print(f"- fetching {sub_interval} prepost={prepost} {fetch_start}->{fetch_end}")
|
||||
r = "silent" if silent else True
|
||||
df_fine = self.history(start=fetch_start, end=fetch_end, interval=sub_interval, auto_adjust=False, actions=False, prepost=prepost, repair=r, keepna=True)
|
||||
if df_fine is None or df_fine.empty:
|
||||
print("YF: WARNING: Cannot reconstruct because Yahoo not returning data in interval")
|
||||
if not silent:
|
||||
print("YF: WARNING: Cannot reconstruct because Yahoo not returning data in interval")
|
||||
continue
|
||||
# Discard the buffer
|
||||
df_fine = df_fine.loc[g[0] : g[-1]+itds[sub_interval]-_datetime.timedelta(milliseconds=1)]
|
||||
|
||||
df_fine["ctr"] = 0
|
||||
if interval == "1wk":
|
||||
# df_fine["Week Start"] = df_fine.index.tz_localize(None).to_period("W-SUN").start_time
|
||||
weekdays = ["MON", "TUE", "WED", "THU", "FRI", "SAT", "SUN"]
|
||||
week_end_day = weekdays[(df_block.index[0].weekday()+7-1)%7]
|
||||
df_fine["Week Start"] = df_fine.index.tz_localize(None).to_period("W-"+week_end_day).start_time
|
||||
@@ -873,31 +1061,36 @@ class TickerBase:
|
||||
new_index = _np.append([df_fine.index[0]], df_fine.index[df_fine["intervalID"].diff()>0])
|
||||
df_new.index = new_index
|
||||
|
||||
if debug:
|
||||
print("- df_new:")
|
||||
print(df_new)
|
||||
|
||||
# Calibrate! Check whether 'df_fine' has different split-adjustment.
|
||||
# If different, then adjust to match 'df'
|
||||
df_block_calib = df_block[price_cols]
|
||||
common_index = df_block_calib.index[df_block_calib.index.isin(df_new.index)]
|
||||
common_index = _np.intersect1d(df_block.index, df_new.index)
|
||||
if len(common_index) == 0:
|
||||
# Can't calibrate so don't attempt repair
|
||||
if debug:
|
||||
print("Can't calibrate so don't attempt repair")
|
||||
continue
|
||||
df_new_calib = df_new[df_new.index.isin(common_index)][price_cols]
|
||||
df_block_calib = df_block_calib[df_block_calib.index.isin(common_index)]
|
||||
calib_filter = (df_block_calib != tag).to_numpy()
|
||||
df_new_calib = df_new[df_new.index.isin(common_index)][price_cols].to_numpy()
|
||||
df_block_calib = df_block[df_block.index.isin(common_index)][price_cols].to_numpy()
|
||||
calib_filter = (df_block_calib != tag)
|
||||
if not calib_filter.any():
|
||||
# Can't calibrate so don't attempt repair
|
||||
if debug:
|
||||
print("Can't calibrate so don't attempt repair")
|
||||
continue
|
||||
# Avoid divide-by-zero warnings printing:
|
||||
df_new_calib = df_new_calib.to_numpy()
|
||||
df_block_calib = df_block_calib.to_numpy()
|
||||
# Avoid divide-by-zero warnings:
|
||||
for j in range(len(price_cols)):
|
||||
c = price_cols[j]
|
||||
f = ~calib_filter[:,j]
|
||||
if f.any():
|
||||
df_block_calib[f,j] = 1
|
||||
df_new_calib[f,j] = 1
|
||||
ratios = (df_block_calib / df_new_calib)[calib_filter]
|
||||
ratios = df_block_calib[calib_filter] / df_new_calib[calib_filter]
|
||||
ratio = _np.mean(ratios)
|
||||
#
|
||||
if debug:
|
||||
print(f"- price calibration ratio (raw) = {ratio}")
|
||||
ratio_rcp = round(1.0 / ratio, 1)
|
||||
ratio = round(ratio, 1)
|
||||
if ratio == 1 and ratio_rcp == 1:
|
||||
@@ -916,13 +1109,22 @@ class TickerBase:
|
||||
df_new["Volume"] *= ratio_rcp
|
||||
|
||||
# Repair!
|
||||
bad_dts = df_block.index[(df_block[price_cols]==tag).any(axis=1)]
|
||||
bad_dts = df_block.index[(df_block[price_cols+["Volume"]]==tag).any(axis=1)]
|
||||
|
||||
if debug:
|
||||
no_fine_data_dts = []
|
||||
for idx in bad_dts:
|
||||
if not idx in df_new.index:
|
||||
# Yahoo didn't return finer-grain data for this interval,
|
||||
# so probably no trading happened.
|
||||
no_fine_data_dts.append(idx)
|
||||
if len(no_fine_data_dts) > 0:
|
||||
print(f"Yahoo didn't return finer-grain data for these intervals:")
|
||||
print(no_fine_data_dts)
|
||||
for idx in bad_dts:
|
||||
if not idx in df_new.index:
|
||||
# Yahoo didn't return finer-grain data for this interval,
|
||||
# so probably no trading happened.
|
||||
# print("no fine data")
|
||||
continue
|
||||
df_new_row = df_new.loc[idx]
|
||||
|
||||
@@ -951,9 +1153,12 @@ class TickerBase:
|
||||
df_v2.loc[idx, "Volume"] = df_new_row["Volume"]
|
||||
n_fixed += 1
|
||||
|
||||
if debug:
|
||||
print("df_v2:") ; print(df_v2)
|
||||
|
||||
return df_v2
|
||||
|
||||
def _fix_unit_mixups(self, df, interval, tz_exchange):
|
||||
def _fix_unit_mixups(self, df, interval, tz_exchange, prepost, silent=False):
|
||||
# Sometimes Yahoo returns few prices in cents/pence instead of $/£
|
||||
# I.e. 100x bigger
|
||||
# Easy to detect and fix, just look for outliers = ~100x local median
|
||||
@@ -975,7 +1180,7 @@ class TickerBase:
|
||||
# adding it to dependencies.
|
||||
from scipy import ndimage as _ndimage
|
||||
|
||||
data_cols = ["High", "Open", "Low", "Close"] # Order important, separate High from Low
|
||||
data_cols = ["High", "Open", "Low", "Close", "Adj Close"] # Order important, separate High from Low
|
||||
data_cols = [c for c in data_cols if c in df2.columns]
|
||||
f_zeroes = (df2[data_cols]==0).any(axis=1)
|
||||
if f_zeroes.any():
|
||||
@@ -1000,7 +1205,7 @@ class TickerBase:
|
||||
df2.loc[fi, c] = tag
|
||||
|
||||
n_before = (df2[data_cols].to_numpy()==tag).sum()
|
||||
df2 = self._reconstruct_intervals_batch(df2, interval, tag=tag)
|
||||
df2 = self._reconstruct_intervals_batch(df2, interval, prepost, tag, silent)
|
||||
n_after = (df2[data_cols].to_numpy()==tag).sum()
|
||||
|
||||
if n_after > 0:
|
||||
@@ -1023,6 +1228,11 @@ class TickerBase:
|
||||
if fi[j]:
|
||||
df2.loc[idx, c] = df.loc[idx, c] * 0.01
|
||||
#
|
||||
c = "Adj Close"
|
||||
j = data_cols.index(c)
|
||||
if fi[j]:
|
||||
df2.loc[idx, c] = df.loc[idx, c] * 0.01
|
||||
#
|
||||
c = "High"
|
||||
j = data_cols.index(c)
|
||||
if fi[j]:
|
||||
@@ -1037,7 +1247,7 @@ class TickerBase:
|
||||
|
||||
n_fixed = n_before - n_after_crude
|
||||
n_fixed_crudely = n_after - n_after_crude
|
||||
if n_fixed > 0:
|
||||
if not silent and n_fixed > 0:
|
||||
report_msg = f"{self.ticker}: fixed {n_fixed}/{n_before} currency unit mixups "
|
||||
if n_fixed_crudely > 0:
|
||||
report_msg += f"({n_fixed_crudely} crudely) "
|
||||
@@ -1057,7 +1267,7 @@ class TickerBase:
|
||||
|
||||
return df2
|
||||
|
||||
def _fix_zeroes(self, df, interval, tz_exchange):
|
||||
def _fix_zeroes(self, df, interval, tz_exchange, prepost, silent=False):
|
||||
# Sometimes Yahoo returns prices=0 or NaN when trades occurred.
|
||||
# But most times when prices=0 or NaN returned is because no trades.
|
||||
# Impossible to distinguish, so only attempt repair if few or rare.
|
||||
@@ -1065,6 +1275,12 @@ class TickerBase:
|
||||
if df.shape[0] == 0:
|
||||
return df
|
||||
|
||||
debug = False
|
||||
# debug = True
|
||||
|
||||
intraday = interval[-1] in ("m", 'h')
|
||||
|
||||
df = df.sort_index() # important!
|
||||
df2 = df.copy()
|
||||
|
||||
if df2.index.tz is None:
|
||||
@@ -1073,16 +1289,34 @@ class TickerBase:
|
||||
df2.index = df2.index.tz_convert(tz_exchange)
|
||||
|
||||
price_cols = [c for c in ["Open", "High", "Low", "Close", "Adj Close"] if c in df2.columns]
|
||||
f_zero_or_nan = (df2[price_cols] == 0.0).values | df2[price_cols].isna().values
|
||||
f_prices_bad = (df2[price_cols] == 0.0) | df2[price_cols].isna()
|
||||
df2_reserve = None
|
||||
if intraday:
|
||||
# Ignore days with >50% intervals containing NaNs
|
||||
df_nans = pd.DataFrame(f_prices_bad.any(axis=1), columns=["nan"])
|
||||
df_nans["_date"] = df_nans.index.date
|
||||
grp = df_nans.groupby("_date")
|
||||
nan_pct = grp.sum() / grp.count()
|
||||
dts = nan_pct.index[nan_pct["nan"]>0.5]
|
||||
f_zero_or_nan_ignore = _np.isin(f_prices_bad.index.date, dts)
|
||||
df2_reserve = df2[f_zero_or_nan_ignore]
|
||||
df2 = df2[~f_zero_or_nan_ignore]
|
||||
f_prices_bad = (df2[price_cols] == 0.0) | df2[price_cols].isna()
|
||||
|
||||
f_high_low_good = (~df2["High"].isna()) & (~df2["Low"].isna())
|
||||
f_vol_bad = (df2["Volume"]==0).to_numpy() & f_high_low_good & (df2["High"]!=df2["Low"]).to_numpy()
|
||||
|
||||
# Check whether worth attempting repair
|
||||
if f_zero_or_nan.any(axis=1).sum() == 0:
|
||||
f_prices_bad = f_prices_bad.to_numpy()
|
||||
f_bad_rows = f_prices_bad.any(axis=1) | f_vol_bad
|
||||
if not f_bad_rows.any():
|
||||
if debug:
|
||||
print("no bad data to repair")
|
||||
return df
|
||||
if f_zero_or_nan.sum() == len(price_cols)*len(df2):
|
||||
if f_prices_bad.sum() == len(price_cols)*len(df2):
|
||||
# Need some good data to calibrate
|
||||
return df
|
||||
# - avoid repair if many zeroes/NaNs
|
||||
pct_zero_or_nan = f_zero_or_nan.sum() / (len(price_cols)*len(df2))
|
||||
if f_zero_or_nan.any(axis=1).sum()>2 and pct_zero_or_nan > 0.05:
|
||||
if debug:
|
||||
print("no good data to calibrate")
|
||||
return df
|
||||
|
||||
data_cols = price_cols + ["Volume"]
|
||||
@@ -1091,17 +1325,31 @@ class TickerBase:
|
||||
tag = -1.0
|
||||
for i in range(len(price_cols)):
|
||||
c = price_cols[i]
|
||||
df2.loc[f_zero_or_nan[:,i], c] = tag
|
||||
df2.loc[f_prices_bad[:,i], c] = tag
|
||||
df2.loc[f_vol_bad, "Volume"] = tag
|
||||
# If volume=0 or NaN for bad prices, then tag volume for repair
|
||||
df2.loc[f_zero_or_nan.any(axis=1) & (df2["Volume"]==0), "Volume"] = tag
|
||||
df2.loc[f_zero_or_nan.any(axis=1) & (df2["Volume"].isna()), "Volume"] = tag
|
||||
f_vol_zero_or_nan = (df2["Volume"].to_numpy()==0) | (df2["Volume"].isna().to_numpy())
|
||||
df2.loc[f_prices_bad.any(axis=1) & f_vol_zero_or_nan, "Volume"] = tag
|
||||
# If volume=0 or NaN but price moved in interval, then tag volume for repair
|
||||
f_change = df2["High"].to_numpy() != df2["Low"].to_numpy()
|
||||
df2.loc[f_change & f_vol_zero_or_nan, "Volume"] = tag
|
||||
|
||||
n_before = (df2[data_cols].to_numpy()==tag).sum()
|
||||
df2 = self._reconstruct_intervals_batch(df2, interval, tag=tag)
|
||||
dts_tagged = df2.index[(df2[data_cols].to_numpy()==tag).any(axis=1)]
|
||||
df2 = self._reconstruct_intervals_batch(df2, interval, prepost, tag, silent)
|
||||
n_after = (df2[data_cols].to_numpy()==tag).sum()
|
||||
dts_not_repaired = df2.index[(df2[data_cols].to_numpy()==tag).any(axis=1)]
|
||||
n_fixed = n_before - n_after
|
||||
if n_fixed > 0:
|
||||
print("{}: fixed {} price=0.0 errors in {} price data".format(self.ticker, n_fixed, interval))
|
||||
if not silent and n_fixed > 0:
|
||||
msg = f"{self.ticker}: fixed {n_fixed}/{n_before} value=0 errors in {interval} price data"
|
||||
if n_fixed < 4:
|
||||
dts_repaired = sorted(list(set(dts_tagged).difference(dts_not_repaired)))
|
||||
msg += f": {dts_repaired}"
|
||||
print(msg)
|
||||
|
||||
if df2_reserve is not None:
|
||||
df2 = _pd.concat([df2, df2_reserve])
|
||||
df2 = df2.sort_index()
|
||||
|
||||
# Restore original values where repair failed (i.e. remove tag values)
|
||||
f = df2[data_cols].values==tag
|
||||
@@ -1137,7 +1385,7 @@ class TickerBase:
|
||||
return tz
|
||||
|
||||
def _fetch_ticker_tz(self, debug_mode, proxy, timeout):
|
||||
# Query Yahoo for basic price data just to get returned timezone
|
||||
# Query Yahoo for fast price data just to get returned timezone
|
||||
|
||||
params = {"range": "1d", "interval": "1d"}
|
||||
|
||||
@@ -1211,9 +1459,14 @@ class TickerBase:
|
||||
data = self._quote.info
|
||||
return data
|
||||
|
||||
@property
|
||||
def fast_info(self):
|
||||
return self._fast_info
|
||||
|
||||
@property
|
||||
def basic_info(self):
|
||||
return self._basic_info
|
||||
print("WARNING: 'Ticker.basic_info' is renamed to 'Ticker.fast_info', hopefully purpose is clearer")
|
||||
return self.fast_info
|
||||
|
||||
def get_sustainability(self, proxy=None, as_dict=False):
|
||||
self._quote.proxy = proxy
|
||||
@@ -1480,7 +1733,6 @@ class TickerBase:
|
||||
|
||||
shares_data = json_data["timeseries"]["result"]
|
||||
if not "shares_out" in shares_data[0]:
|
||||
print(f"{self.ticker}: Yahoo did not return share count in date range {start} -> {end}")
|
||||
return None
|
||||
try:
|
||||
df = _pd.Series(shares_data[0]["shares_out"], index=_pd.to_datetime(shares_data[0]["timestamp"], unit="s"))
|
||||
@@ -1568,22 +1820,25 @@ class TickerBase:
|
||||
url = "{}/calendar/earnings?symbol={}&offset={}&size={}".format(
|
||||
_ROOT_URL_, self.ticker, page_offset, page_size)
|
||||
|
||||
data = self._data.cache_get(url=url, proxy=proxy).text
|
||||
|
||||
response = self._data.cache_get(url=url, proxy=proxy)
|
||||
data = response.text
|
||||
if "Will be right back" in data:
|
||||
raise RuntimeError("*** YAHOO! FINANCE IS CURRENTLY DOWN! ***\n"
|
||||
"Our engineers are working quickly to resolve "
|
||||
"the issue. Thank you for your patience.")
|
||||
|
||||
try:
|
||||
data = _pd.read_html(data)[0]
|
||||
except ValueError:
|
||||
if page_offset == 0:
|
||||
# Should not fail on first page
|
||||
if "Showing Earnings for:" in data:
|
||||
# Actually YF was successful, problem is company doesn't have earnings history
|
||||
dates = utils.empty_earnings_dates_df()
|
||||
break
|
||||
if "yf_html_pd" in dir(response):
|
||||
data = response.yf_html_pd[0]
|
||||
else:
|
||||
try:
|
||||
data = _pd.read_html(data)[0]
|
||||
except ValueError:
|
||||
if page_offset == 0:
|
||||
# Should not fail on first page
|
||||
if "Showing Earnings for:" in data:
|
||||
# Actually YF was successful, problem is company doesn't have earnings history
|
||||
dates = utils.empty_earnings_dates_df()
|
||||
break
|
||||
if dates is None:
|
||||
dates = data
|
||||
else:
|
||||
@@ -1639,6 +1894,6 @@ class TickerBase:
|
||||
|
||||
def get_history_metadata(self) -> dict:
|
||||
if self._history_metadata is None:
|
||||
raise RuntimeError("Metadata was never retrieved so far, "
|
||||
"call history() to retrieve it")
|
||||
# Request intraday data, because then Yahoo returns exchange schedule.
|
||||
self.history(period="1wk", interval="1h", prepost=True)
|
||||
return self._history_metadata
|
||||
|
||||
345
yfinance/data.py
345
yfinance/data.py
@@ -14,6 +14,8 @@ else:
|
||||
|
||||
import requests as requests
|
||||
import re
|
||||
import pandas as _pd
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from frozendict import frozendict
|
||||
|
||||
@@ -24,6 +26,8 @@ except ImportError:
|
||||
|
||||
cache_maxsize = 64
|
||||
|
||||
prune_session_cache = True
|
||||
|
||||
|
||||
def lru_cache_freezeargs(func):
|
||||
"""
|
||||
@@ -46,67 +50,38 @@ def lru_cache_freezeargs(func):
|
||||
return wrapped
|
||||
|
||||
|
||||
def decrypt_cryptojs_aes_stores(data):
|
||||
def _extract_extra_keys_from_stores(data):
|
||||
new_keys = [k for k in data.keys() if k not in ["context", "plugins"]]
|
||||
new_keys_values = set([data[k] for k in new_keys])
|
||||
|
||||
# Maybe multiple keys have same value - keep one of each
|
||||
new_keys_uniq = []
|
||||
new_keys_uniq_values = set()
|
||||
for k in new_keys:
|
||||
v = data[k]
|
||||
if not v in new_keys_uniq_values:
|
||||
new_keys_uniq.append(k)
|
||||
new_keys_uniq_values.add(v)
|
||||
|
||||
return [data[k] for k in new_keys_uniq]
|
||||
|
||||
|
||||
def decrypt_cryptojs_aes_stores(data, keys=None):
|
||||
encrypted_stores = data['context']['dispatcher']['stores']
|
||||
|
||||
password = None
|
||||
candidate_passwords = []
|
||||
if keys is not None:
|
||||
if not isinstance(keys, list):
|
||||
raise TypeError("'keys' must be list")
|
||||
candidate_passwords = keys
|
||||
else:
|
||||
candidate_passwords = []
|
||||
|
||||
if "_cs" in data and "_cr" in data:
|
||||
_cs = data["_cs"]
|
||||
_cr = data["_cr"]
|
||||
_cr = b"".join(int.to_bytes(i, length=4, byteorder="big", signed=True) for i in json.loads(_cr)["words"])
|
||||
password = hashlib.pbkdf2_hmac("sha1", _cs.encode("utf8"), _cr, 1, dklen=32).hex()
|
||||
else:
|
||||
# Currently assume one extra key in dict, which is password. Print error if
|
||||
# more extra keys detected.
|
||||
new_keys = [k for k in data.keys() if k not in ["context", "plugins"]]
|
||||
new_keys_values = set([data[k] for k in new_keys])
|
||||
|
||||
# Maybe multiple keys have same value - keep one of each
|
||||
new_keys2 = []
|
||||
new_keys2_values = set()
|
||||
for k in new_keys:
|
||||
v = data[k]
|
||||
if not v in new_keys2_values:
|
||||
new_keys2.append(k)
|
||||
new_keys2_values.add(v)
|
||||
|
||||
l = len(new_keys)
|
||||
if l == 0:
|
||||
return None
|
||||
elif l == 1 and isinstance(data[new_keys[0]], str):
|
||||
password_key = new_keys[0]
|
||||
# else:
|
||||
# msg = "Yahoo has again changed data format, yfinance now unsure which key(s) is for decryption:"
|
||||
# new_keys_pretty = {}
|
||||
# l = min(10, len(new_keys))
|
||||
# for i in range(0, l):
|
||||
# k = new_keys[i]
|
||||
# k_str = k if len(k) < 32 else k[:32-3]+"..."
|
||||
# v = data[k]
|
||||
# v_type = type(v)
|
||||
# v_str = str(v)
|
||||
# if len(v_str) > 256:
|
||||
# v_str = v_str[:256]+"..."
|
||||
# new_keys_pretty[k_str] = f"{v_str}' ({v_type})"
|
||||
# for k in new_keys_pretty:
|
||||
# msg += '\n' + f"'{k}' -> '{new_keys_pretty[k]}'"
|
||||
# if len(new_keys) > l:
|
||||
# d = len(new_keys) - l
|
||||
# msg += '\n' + "..."
|
||||
# msg += '\n' + f"{d} more options!"
|
||||
# raise Exception(msg)
|
||||
# password_key = new_keys[0]
|
||||
# password = data[password_key]
|
||||
|
||||
# The above attempt to smartly pick out decryption key has stopped working.
|
||||
# Fortunately the keys Yahoo use are currently hardcoded in their JSON:
|
||||
candidate_passwords += ["ad4d90b3c9f2e1d156ef98eadfa0ff93e4042f6960e54aa2a13f06f528e6b50ba4265a26a1fd5b9cd3db0d268a9c34e1d080592424309429a58bce4adc893c87", \
|
||||
"e9a8ab8e5620b712ebc2fb4f33d5c8b9c80c0d07e8c371911c785cf674789f1747d76a909510158a7b7419e86857f2d7abbd777813ff64840e4cbc514d12bcae",
|
||||
"6ae2523aeafa283dad746556540145bf603f44edbf37ad404d3766a8420bb5eb1d3738f52a227b88283cca9cae44060d5f0bba84b6a495082589f5fe7acbdc9e",
|
||||
"3365117c2a368ffa5df7313a4a84988f73926a86358e8eea9497c5ff799ce27d104b68e5f2fbffa6f8f92c1fef41765a7066fa6bcf050810a9c4c7872fd3ebf0"]
|
||||
|
||||
# candidate_passwords += [data[k] for k in new_keys] # don't do these, none work
|
||||
|
||||
encrypted_stores = b64decode(encrypted_stores)
|
||||
assert encrypted_stores[0:8] == b"Salted__"
|
||||
@@ -187,7 +162,7 @@ def decrypt_cryptojs_aes_stores(data):
|
||||
except:
|
||||
pass
|
||||
if not success:
|
||||
raise Exception("yfinance failed to decrypt Yahoo data response with hardcoded keys, contact developers")
|
||||
raise Exception("yfinance failed to decrypt Yahoo data response")
|
||||
|
||||
decoded_stores = json.loads(plaintext)
|
||||
return decoded_stores
|
||||
@@ -196,6 +171,13 @@ def decrypt_cryptojs_aes_stores(data):
|
||||
_SCRAPE_URL_ = 'https://finance.yahoo.com/quote'
|
||||
|
||||
|
||||
def enable_prune_session_cache():
|
||||
global prune_session_cache
|
||||
prune_session_cache = True
|
||||
def disable_prune_session_cache():
|
||||
global prune_session_cache
|
||||
prune_session_cache = False
|
||||
|
||||
class TickerData:
|
||||
"""
|
||||
Have one place to retrieve data from Yahoo API in order to ease caching and speed up operations
|
||||
@@ -207,6 +189,18 @@ class TickerData:
|
||||
self.ticker = ticker
|
||||
self._session = session or requests
|
||||
|
||||
def _check_requests_cache_hook(self):
|
||||
try:
|
||||
c = self._session.cache
|
||||
except AttributeError:
|
||||
# Not a caching session
|
||||
return
|
||||
global prune_session_cache
|
||||
if not prune_session_cache:
|
||||
self._session.hooks["response"] = []
|
||||
elif prune_session_cache and not self._check_Yahoo_response in self._session.hooks["response"]:
|
||||
self._session.hooks["response"].append(self._check_Yahoo_response)
|
||||
|
||||
def get(self, url, user_agent_headers=None, params=None, proxy=None, timeout=30):
|
||||
proxy = self._get_proxy(proxy)
|
||||
response = self._session.get(
|
||||
@@ -230,6 +224,198 @@ class TickerData:
|
||||
proxy = {"https": proxy}
|
||||
return proxy
|
||||
|
||||
def _get_decryption_keys_from_yahoo_js(self, soup):
|
||||
result = None
|
||||
|
||||
key_count = 4
|
||||
re_script = soup.find("script", string=re.compile("root.App.main")).text
|
||||
re_data = json.loads(re.search("root.App.main\s+=\s+(\{.*\})", re_script).group(1))
|
||||
re_data.pop("context", None)
|
||||
key_list = list(re_data.keys())
|
||||
if re_data.get("plugins"): # 1) attempt to get last 4 keys after plugins
|
||||
ind = key_list.index("plugins")
|
||||
if len(key_list) > ind+1:
|
||||
sub_keys = key_list[ind+1:]
|
||||
if len(sub_keys) == key_count:
|
||||
re_obj = {}
|
||||
missing_val = False
|
||||
for k in sub_keys:
|
||||
if not re_data.get(k):
|
||||
missing_val = True
|
||||
break
|
||||
re_obj.update({k: re_data.get(k)})
|
||||
if not missing_val:
|
||||
result = re_obj
|
||||
|
||||
if not result is None:
|
||||
return [''.join(result.values())]
|
||||
|
||||
re_keys = [] # 2) attempt scan main.js file approach to get keys
|
||||
prefix = "https://s.yimg.com/uc/finance/dd-site/js/main."
|
||||
tags = [tag['src'] for tag in soup.find_all('script') if prefix in tag.get('src', '')]
|
||||
for t in tags:
|
||||
response_js = self.cache_get(t)
|
||||
#
|
||||
if response_js.status_code != 200:
|
||||
time.sleep(random.randrange(10, 20))
|
||||
response_js.close()
|
||||
else:
|
||||
r_data = response_js.content.decode("utf8")
|
||||
re_list = [
|
||||
x.group() for x in re.finditer(r"context.dispatcher.stores=JSON.parse((?:.*?\r?\n?)*)toString", r_data)
|
||||
]
|
||||
for rl in re_list:
|
||||
re_sublist = [x.group() for x in re.finditer(r"t\[\"((?:.*?\r?\n?)*)\"\]", rl)]
|
||||
if len(re_sublist) == key_count:
|
||||
re_keys = [sl.replace('t["', '').replace('"]', '') for sl in re_sublist]
|
||||
break
|
||||
response_js.close()
|
||||
if len(re_keys) == key_count:
|
||||
break
|
||||
re_obj = {}
|
||||
missing_val = False
|
||||
for k in re_keys:
|
||||
if not re_data.get(k):
|
||||
missing_val = True
|
||||
break
|
||||
re_obj.update({k: re_data.get(k)})
|
||||
if not missing_val:
|
||||
return [''.join(re_obj.values())]
|
||||
|
||||
return []
|
||||
|
||||
def _gather_keys_from_response(self, response):
|
||||
# Gather decryption keys:
|
||||
soup = BeautifulSoup(response.content, "html.parser")
|
||||
keys = self._get_decryption_keys_from_yahoo_js(soup)
|
||||
if len(keys) == 0:
|
||||
msg = "No decryption keys could be extracted from JS file."
|
||||
if "requests_cache" in str(type(response)):
|
||||
msg += " Try flushing your 'requests_cache', probably parsing old JS."
|
||||
print("WARNING: " + msg + " Falling back to backup decrypt methods.")
|
||||
if len(keys) == 0:
|
||||
keys = []
|
||||
try:
|
||||
extra_keys = _extract_extra_keys_from_stores(data)
|
||||
keys = [''.join(extra_keys[-4:])]
|
||||
except:
|
||||
pass
|
||||
#
|
||||
keys_url = "https://github.com/ranaroussi/yfinance/raw/main/yfinance/scrapers/yahoo-keys.txt"
|
||||
response_gh = self.cache_get(keys_url)
|
||||
keys += response_gh.text.splitlines()
|
||||
return keys
|
||||
|
||||
def _check_Yahoo_response(self, r, *args, **kwargs):
|
||||
# Parse the data returned by Yahoo to determine if corrupt/incomplete.
|
||||
# If bad, set 'status_code' to 204 "No content" , that stops it
|
||||
# entering a requests_cache.
|
||||
|
||||
# Because this involves parsing, the output is added to response object
|
||||
# with prefix "yf_" and reused elsewhere.
|
||||
|
||||
if not "yahoo.com/" in r.url:
|
||||
# Only check Yahoo responses
|
||||
return
|
||||
|
||||
attrs = dir(r)
|
||||
r_from_cache = "from_cache" in attrs and r.from_cache
|
||||
if "yf_data" in attrs or "yf_json" in attrs or "yf_html_pd" in attrs:
|
||||
# Have already parsed this response, successfully
|
||||
return
|
||||
|
||||
if "Will be right back" in r.text:
|
||||
# Simple check, no parsing needed
|
||||
r.status_code = 204
|
||||
return r
|
||||
|
||||
parse_failed = False
|
||||
r_modified = False
|
||||
|
||||
if "/ws/fundamentals-timeseries" in r.url:
|
||||
# Timeseries
|
||||
try:
|
||||
data = r.json()
|
||||
r.yf_json = data
|
||||
r_modified = True
|
||||
data["timeseries"]["result"]
|
||||
except:
|
||||
parse_failed = True
|
||||
elif "/finance/chart/" in r.url:
|
||||
# Prices
|
||||
try:
|
||||
data = r.json()
|
||||
r.yf_json = data
|
||||
r_modified = True
|
||||
if data["chart"]["error"] is not None:
|
||||
parse_failed = True
|
||||
except Exception:
|
||||
parse_failed = True
|
||||
elif "/finance/options/" in r.url:
|
||||
# Options
|
||||
if not "expirationDates" in r.text:
|
||||
# Parse will fail
|
||||
parse_failed = True
|
||||
elif "/finance/search?" in r.url:
|
||||
# News, can't be bothered to check
|
||||
return
|
||||
elif "/calendar/earnings?" in r.url:
|
||||
try:
|
||||
dfs = _pd.read_html(r.text)
|
||||
except ValueError as e:
|
||||
if "No tables found" in str(e):
|
||||
# Maybe this ticker doesn't have any earnings dates
|
||||
pass
|
||||
else:
|
||||
parse_failed = True
|
||||
except Exception as e:
|
||||
parse_failed = True
|
||||
else:
|
||||
r.yf_html_pd = dfs
|
||||
r_modified = True
|
||||
elif "root.App.main" in r.text:
|
||||
# JSON data stores
|
||||
try:
|
||||
json_str = r.text.split('root.App.main =')[1].split(
|
||||
'(this)')[0].split(';\n}')[0].strip()
|
||||
except IndexError:
|
||||
parse_failed = True
|
||||
|
||||
if not parse_failed:
|
||||
data = json.loads(json_str)
|
||||
|
||||
keys = self._gather_keys_from_response(r)
|
||||
|
||||
# Decrypt!
|
||||
stores = decrypt_cryptojs_aes_stores(data, keys)
|
||||
if stores is None:
|
||||
# Maybe Yahoo returned old format, not encrypted
|
||||
if "context" in data and "dispatcher" in data["context"]:
|
||||
stores = data['context']['dispatcher']['stores']
|
||||
if stores is None:
|
||||
# raise Exception(f"{self.ticker}: Failed to extract data stores from web request")
|
||||
print(f"{self.ticker}: Failed to decrypt/extract data stores from web request")
|
||||
parse_failed = True
|
||||
|
||||
if "yf_data" not in attrs:
|
||||
# if not parse_failed and "yf_data" not in attrs:
|
||||
r.yf_data = stores
|
||||
r_modified = True
|
||||
|
||||
if stores is not None and "QuoteSummaryStore" not in stores:
|
||||
parse_failed = True
|
||||
|
||||
else:
|
||||
return
|
||||
|
||||
if parse_failed:
|
||||
if not r_from_cache:
|
||||
r.status_code = 204 # No content
|
||||
r_modified = True
|
||||
|
||||
if r_modified:
|
||||
return r
|
||||
|
||||
@lru_cache_freezeargs
|
||||
@lru_cache(maxsize=cache_maxsize)
|
||||
def get_json_data_stores(self, sub_page: str = None, proxy=None) -> dict:
|
||||
@@ -241,29 +427,46 @@ class TickerData:
|
||||
else:
|
||||
ticker_url = "{}/{}".format(_SCRAPE_URL_, self.ticker)
|
||||
|
||||
html = self.get(url=ticker_url, proxy=proxy).text
|
||||
# Ensure hook ready to intercept get responses
|
||||
self._check_requests_cache_hook()
|
||||
|
||||
# The actual json-data for stores is in a javascript assignment in the webpage
|
||||
try:
|
||||
json_str = html.split('root.App.main =')[1].split(
|
||||
'(this)')[0].split(';\n}')[0].strip()
|
||||
except IndexError:
|
||||
# Fetch failed, probably because Yahoo spam triggered
|
||||
return {}
|
||||
response = self.get(url=ticker_url, proxy=proxy)
|
||||
|
||||
data = json.loads(json_str)
|
||||
if "yf_data" in dir(response):
|
||||
# _check_requests_cache_hook() already successfully extracted & decrypted
|
||||
stores = response.yf_data
|
||||
else:
|
||||
# Extract JSON and decrypt
|
||||
html = response.text
|
||||
|
||||
stores = decrypt_cryptojs_aes_stores(data)
|
||||
if stores is None:
|
||||
# Maybe Yahoo returned old format, not encrypted
|
||||
if "context" in data and "dispatcher" in data["context"]:
|
||||
stores = data['context']['dispatcher']['stores']
|
||||
if stores is None:
|
||||
raise Exception(f"{self.ticker}: Failed to extract data stores from web request")
|
||||
# The actual json-data for stores is in a javascript assignment in the webpage
|
||||
try:
|
||||
json_str = html.split('root.App.main =')[1].split(
|
||||
'(this)')[0].split(';\n}')[0].strip()
|
||||
except IndexError:
|
||||
# Problem with data so clear from session cache
|
||||
# self.session_cache_prune_url(ticker_url)
|
||||
# Then exit
|
||||
return {}
|
||||
|
||||
data = json.loads(json_str)
|
||||
|
||||
keys = self._gather_keys_from_response(response)
|
||||
|
||||
# Decrypt!
|
||||
stores = decrypt_cryptojs_aes_stores(data, keys)
|
||||
if stores is None:
|
||||
# Maybe Yahoo returned old format, not encrypted
|
||||
if "context" in data and "dispatcher" in data["context"]:
|
||||
stores = data['context']['dispatcher']['stores']
|
||||
if stores is None:
|
||||
raise Exception(f"{self.ticker}: Failed to extract data stores from web request")
|
||||
|
||||
# return data
|
||||
new_data = json.dumps(stores).replace('{}', 'null')
|
||||
new_data = re.sub(
|
||||
r'{[\'|\"]raw[\'|\"]:(.*?),(.*?)}', r'\1', new_data)
|
||||
|
||||
return json.loads(new_data)
|
||||
json_data = json.loads(new_data)
|
||||
|
||||
return json_data
|
||||
|
||||
@@ -29,7 +29,7 @@ from . import Ticker, utils
|
||||
from . import shared
|
||||
|
||||
|
||||
def download(tickers, start=None, end=None, actions=False, threads=True, ignore_tz=False,
|
||||
def download(tickers, start=None, end=None, actions=False, threads=True, ignore_tz=None,
|
||||
group_by='column', auto_adjust=False, back_adjust=False, repair=False, keepna=False,
|
||||
progress=True, period="max", show_errors=True, interval="1d", prepost=False,
|
||||
proxy=None, rounding=False, timeout=10):
|
||||
@@ -68,7 +68,7 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
|
||||
How many threads to use for mass downloading. Default is True
|
||||
ignore_tz: bool
|
||||
When combining from different timezones, ignore that part of datetime.
|
||||
Default is False
|
||||
Default depends on interval. Intraday = False. Day+ = True.
|
||||
proxy: str
|
||||
Optional. Proxy server URL scheme. Default is None
|
||||
rounding: bool
|
||||
@@ -80,6 +80,14 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
|
||||
seconds. (Can also be a fraction of a second e.g. 0.01)
|
||||
"""
|
||||
|
||||
if ignore_tz is None:
|
||||
# Set default value depending on interval
|
||||
if interval[1:] in ['m', 'h']:
|
||||
# Intraday
|
||||
ignore_tz = False
|
||||
else:
|
||||
ignore_tz = True
|
||||
|
||||
# create ticker list
|
||||
tickers = tickers if isinstance(
|
||||
tickers, (list, set, tuple)) else tickers.replace(',', ' ').split()
|
||||
|
||||
@@ -7,16 +7,14 @@ from yfinance import utils
|
||||
from yfinance.data import TickerData
|
||||
|
||||
|
||||
info_retired_keys_price = {"currentPrice", "dayHigh", "dayLow", "open", "previousClose", "volume"}
|
||||
info_retired_keys_price = {"currentPrice", "dayHigh", "dayLow", "open", "previousClose", "volume", "volume24Hr"}
|
||||
info_retired_keys_price.update({"regularMarket"+s for s in ["DayHigh", "DayLow", "Open", "PreviousClose", "Price", "Volume"]})
|
||||
info_retired_keys_price.update({"fiftyTwoWeekLow", "fiftyTwoWeekHigh", "fiftyTwoWeekChange", "fiftyDayAverage", "twoHundredDayAverage"})
|
||||
info_retired_keys_price.update({"fiftyTwoWeekLow", "fiftyTwoWeekHigh", "fiftyTwoWeekChange", "52WeekChange", "fiftyDayAverage", "twoHundredDayAverage"})
|
||||
info_retired_keys_price.update({"averageDailyVolume10Day", "averageVolume10days", "averageVolume"})
|
||||
info_retired_keys_exchange = {"currency", "exchange", "exchangeTimezoneName", "exchangeTimezoneShortName"}
|
||||
info_retired_keys_exchange = {"currency", "exchange", "exchangeTimezoneName", "exchangeTimezoneShortName", "quoteType"}
|
||||
info_retired_keys_marketCap = {"marketCap"}
|
||||
info_retired_keys_symbol = {"symbol"}
|
||||
info_retired_keys = info_retired_keys_price | info_retired_keys_exchange | info_retired_keys_marketCap | info_retired_keys_symbol
|
||||
#
|
||||
info_retired_keys = []
|
||||
|
||||
|
||||
PRUNE_INFO = True
|
||||
@@ -46,16 +44,16 @@ class InfoDictWrapper(MutableMapping):
|
||||
|
||||
def __getitem__(self, k):
|
||||
if k in info_retired_keys_price:
|
||||
print(f"Price data removed from info. Use Ticker.basic_info or history() instead")
|
||||
print(f"Price data removed from info (key='{k}'). Use Ticker.fast_info or history() instead")
|
||||
return None
|
||||
elif k in info_retired_keys_exchange:
|
||||
print(f"Exchange data removed from info. Use Ticker.basic_info or Ticker.get_history_metadata() instead")
|
||||
print(f"Exchange data removed from info (key='{k}'). Use Ticker.fast_info or Ticker.get_history_metadata() instead")
|
||||
return None
|
||||
elif k in info_retired_keys_marketCap:
|
||||
print(f"Market cap removed from info. Use Ticker.basic_info instead")
|
||||
print(f"Market cap removed from info (key='{k}'). Use Ticker.fast_info instead")
|
||||
return None
|
||||
elif k in info_retired_keys_symbol:
|
||||
print(f"Symbol removed from info. You know this already")
|
||||
print(f"Symbol removed from info (key='{k}'). You know this already")
|
||||
return None
|
||||
return self.info[self._keytransform(k)]
|
||||
|
||||
@@ -83,6 +81,7 @@ class Quote:
|
||||
self.proxy = proxy
|
||||
|
||||
self._info = None
|
||||
self._retired_info = None
|
||||
self._sustainability = None
|
||||
self._recommendations = None
|
||||
self._calendar = None
|
||||
@@ -201,10 +200,14 @@ class Quote:
|
||||
|
||||
# Delete redundant info[] keys, because values can be accessed faster
|
||||
# elsewhere - e.g. price keys. Hope is reduces Yahoo spam effect.
|
||||
if PRUNE_INFO:
|
||||
for k in info_retired_keys:
|
||||
if k in self._info:
|
||||
# But record the dropped keys, because in rare cases they are needed.
|
||||
self._retired_info = {}
|
||||
for k in info_retired_keys:
|
||||
if k in self._info:
|
||||
self._retired_info[k] = self._info[k]
|
||||
if PRUNE_INFO:
|
||||
del self._info[k]
|
||||
if PRUNE_INFO:
|
||||
# InfoDictWrapper will explain how to access above data elsewhere
|
||||
self._info = InfoDictWrapper(self._info)
|
||||
|
||||
@@ -280,11 +283,14 @@ class Quote:
|
||||
|
||||
json_str = self._data.cache_get(url=url, proxy=proxy).text
|
||||
json_data = json.loads(json_str)
|
||||
key_stats = json_data["timeseries"]["result"][0]
|
||||
if k not in key_stats:
|
||||
# Yahoo website prints N/A, indicates Yahoo lacks necessary data to calculate
|
||||
try:
|
||||
key_stats = json_data["timeseries"]["result"][0]
|
||||
if k not in key_stats:
|
||||
# Yahoo website prints N/A, indicates Yahoo lacks necessary data to calculate
|
||||
v = None
|
||||
else:
|
||||
# Select most recent (last) raw value in list:
|
||||
v = key_stats[k][-1]["reportedValue"]["raw"]
|
||||
except Exception:
|
||||
v = None
|
||||
else:
|
||||
# Select most recent (last) raw value in list:
|
||||
v = key_stats[k][-1]["reportedValue"]["raw"]
|
||||
self._info[k] = v
|
||||
|
||||
5
yfinance/scrapers/yahoo-keys.txt
Normal file
5
yfinance/scrapers/yahoo-keys.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
daf93e37cbf219cd4c1f3f74ec4551265ec5565b99e8c9322dccd6872941cf13c818cbb88cba6f530e643b4e2329b17ec7161f4502ce6a02bb0dbbe5fc0d0474
|
||||
ad4d90b3c9f2e1d156ef98eadfa0ff93e4042f6960e54aa2a13f06f528e6b50ba4265a26a1fd5b9cd3db0d268a9c34e1d080592424309429a58bce4adc893c87
|
||||
e9a8ab8e5620b712ebc2fb4f33d5c8b9c80c0d07e8c371911c785cf674789f1747d76a909510158a7b7419e86857f2d7abbd777813ff64840e4cbc514d12bcae
|
||||
6ae2523aeafa283dad746556540145bf603f44edbf37ad404d3766a8420bb5eb1d3738f52a227b88283cca9cae44060d5f0bba84b6a495082589f5fe7acbdc9e
|
||||
3365117c2a368ffa5df7313a4a84988f73926a86358e8eea9497c5ff799ce27d104b68e5f2fbffa6f8f92c1fef41765a7066fa6bcf050810a9c4c7872fd3ebf0
|
||||
@@ -133,10 +133,6 @@ class Ticker(TickerBase):
|
||||
def shares(self) -> _pd.DataFrame :
|
||||
return self.get_shares()
|
||||
|
||||
@property
|
||||
def market_cap(self) -> float:
|
||||
return self.calc_market_cap()
|
||||
|
||||
@property
|
||||
def info(self) -> dict:
|
||||
return self.get_info()
|
||||
|
||||
@@ -300,6 +300,11 @@ def camel2title(strings: List[str], sep: str = ' ', acronyms: Optional[List[str]
|
||||
return strings
|
||||
|
||||
|
||||
def snake_case_2_camelCase(s):
|
||||
sc = s.split('_')[0] + ''.join(x.title() for x in s.split('_')[1:])
|
||||
return sc
|
||||
|
||||
|
||||
def _parse_user_dt(dt, exchange_tz):
|
||||
if isinstance(dt, int):
|
||||
# Should already be epoch, test with conversion:
|
||||
@@ -443,6 +448,35 @@ def set_df_tz(df, interval, tz):
|
||||
return df
|
||||
|
||||
|
||||
def fix_Yahoo_returning_prepost_unrequested(quotes, interval, metadata):
|
||||
# Sometimes Yahoo returns post-market data despite not requesting it.
|
||||
# Normally happens on half-day early closes.
|
||||
#
|
||||
# And sometimes returns pre-market data despite not requesting it.
|
||||
# E.g. some London tickers.
|
||||
tps_df = metadata["tradingPeriods"]
|
||||
tps_df["_date"] = tps_df.index.date
|
||||
quotes["_date"] = quotes.index.date
|
||||
idx = quotes.index.copy()
|
||||
quotes = quotes.merge(tps_df, how="left", validate="many_to_one")
|
||||
quotes.index = idx
|
||||
# "end" = end of regular trading hours (including any auction)
|
||||
f_drop = quotes.index >= quotes["end"]
|
||||
f_drop = f_drop | (quotes.index < quotes["start"])
|
||||
if f_drop.any():
|
||||
# When printing report, ignore rows that were already NaNs:
|
||||
f_na = quotes[["Open","Close"]].isna().all(axis=1)
|
||||
n_nna = quotes.shape[0] - _np.sum(f_na)
|
||||
n_drop_nna = _np.sum(f_drop & ~f_na)
|
||||
quotes_dropped = quotes[f_drop]
|
||||
# if debug and n_drop_nna > 0:
|
||||
# print(f"Dropping {n_drop_nna}/{n_nna} intervals for falling outside regular trading hours")
|
||||
quotes = quotes[~f_drop]
|
||||
metadata["tradingPeriods"] = tps_df.drop(["_date"], axis=1)
|
||||
quotes = quotes.drop(["_date", "start", "end"], axis=1)
|
||||
return quotes
|
||||
|
||||
|
||||
def fix_Yahoo_returning_live_separate(quotes, interval, tz_exchange):
|
||||
# Yahoo bug fix. If market is open today then Yahoo normally returns
|
||||
# todays data as a separate row from rest-of week/month interval in above row.
|
||||
@@ -656,6 +690,71 @@ def is_valid_timezone(tz: str) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
def format_history_metadata(md):
|
||||
if not isinstance(md, dict):
|
||||
return md
|
||||
if len(md) == 0:
|
||||
return md
|
||||
|
||||
tz = md["exchangeTimezoneName"]
|
||||
|
||||
for k in ["firstTradeDate", "regularMarketTime"]:
|
||||
if k in md:
|
||||
md[k] = _pd.to_datetime(md[k], unit='s', utc=True).tz_convert(tz)
|
||||
|
||||
if "currentTradingPeriod" in md:
|
||||
for m in ["regular", "pre", "post"]:
|
||||
if m in md["currentTradingPeriod"]:
|
||||
for t in ["start", "end"]:
|
||||
md["currentTradingPeriod"][m][t] = \
|
||||
_pd.to_datetime(md["currentTradingPeriod"][m][t], unit='s', utc=True).tz_convert(tz)
|
||||
del md["currentTradingPeriod"][m]["gmtoffset"]
|
||||
del md["currentTradingPeriod"][m]["timezone"]
|
||||
|
||||
if "tradingPeriods" in md:
|
||||
if md["tradingPeriods"] == {"pre":[], "post":[]}:
|
||||
del md["tradingPeriods"]
|
||||
|
||||
if "tradingPeriods" in md:
|
||||
tps = md["tradingPeriods"]
|
||||
if isinstance(tps, list):
|
||||
# Only regular times
|
||||
regs_dict = [tps[i][0] for i in range(len(tps))]
|
||||
pres_dict = None
|
||||
posts_dict = None
|
||||
elif isinstance(tps, dict):
|
||||
# Includes pre- and post-market
|
||||
pres_dict = [tps["pre"][i][0] for i in range(len(tps["pre"]))]
|
||||
posts_dict = [tps["post"][i][0] for i in range(len(tps["post"]))]
|
||||
regs_dict = [tps["regular"][i][0] for i in range(len(tps["regular"]))]
|
||||
else:
|
||||
raise Exception()
|
||||
|
||||
def _dict_to_table(d):
|
||||
df = _pd.DataFrame.from_dict(d).drop(["timezone", "gmtoffset"], axis=1)
|
||||
df["end"] = _pd.to_datetime(df["end"], unit='s', utc=True).dt.tz_convert(tz)
|
||||
df["start"] = _pd.to_datetime(df["start"], unit='s', utc=True).dt.tz_convert(tz)
|
||||
df.index = _pd.to_datetime(df["start"].dt.date)
|
||||
df.index = df.index.tz_localize(tz)
|
||||
return df
|
||||
|
||||
df = _dict_to_table(regs_dict)
|
||||
df_cols = ["start", "end"]
|
||||
if pres_dict is not None:
|
||||
pre_df = _dict_to_table(pres_dict)
|
||||
df = df.merge(pre_df.rename(columns={"start":"pre_start", "end":"pre_end"}), left_index=True, right_index=True)
|
||||
df_cols = ["pre_start", "pre_end"]+df_cols
|
||||
if posts_dict is not None:
|
||||
post_df = _dict_to_table(posts_dict)
|
||||
df = df.merge(post_df.rename(columns={"start":"post_start", "end":"post_end"}), left_index=True, right_index=True)
|
||||
df_cols = df_cols+["post_start", "post_end"]
|
||||
df = df[df_cols]
|
||||
df.index.name = "Date"
|
||||
|
||||
md["tradingPeriods"] = df
|
||||
|
||||
return md
|
||||
|
||||
class ProgressBar:
|
||||
def __init__(self, iterations, text='completed'):
|
||||
self.text = text
|
||||
@@ -718,7 +817,14 @@ class _KVStore:
|
||||
with self._cache_mutex:
|
||||
self.conn = _sqlite3.connect(filename, timeout=10, check_same_thread=False)
|
||||
self.conn.execute('pragma journal_mode=wal')
|
||||
self.conn.execute('create table if not exists "kv" (key TEXT primary key, value TEXT) without rowid')
|
||||
try:
|
||||
self.conn.execute('create table if not exists "kv" (key TEXT primary key, value TEXT) without rowid')
|
||||
except Exception as e:
|
||||
if 'near "without": syntax error' in str(e):
|
||||
# "without rowid" requires sqlite 3.8.2. Older versions will raise exception
|
||||
self.conn.execute('create table if not exists "kv" (key TEXT primary key, value TEXT)')
|
||||
else:
|
||||
raise
|
||||
self.conn.commit()
|
||||
_atexit.register(self.close)
|
||||
|
||||
|
||||
@@ -1 +1 @@
|
||||
version = "0.2.5"
|
||||
version = "0.2.10b2"
|
||||
|
||||
Reference in New Issue
Block a user