Compare commits
55 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b286797e8c | ||
|
|
b306bef350 | ||
|
|
61c89660df | ||
|
|
31af2ab1d5 | ||
|
|
21c380fa61 | ||
|
|
e0000cd787 | ||
|
|
11d43eb1a1 | ||
|
|
509a109f29 | ||
|
|
b0639409a3 | ||
|
|
aba81eedc2 | ||
|
|
9268fcfa76 | ||
|
|
6055566de8 | ||
|
|
398a19a855 | ||
|
|
e771cfabb6 | ||
|
|
5b676f803b | ||
|
|
eb5c50d5c7 | ||
|
|
1cb0b215c4 | ||
|
|
50dcb2ce5a | ||
|
|
1ce9ce2784 | ||
|
|
cd4816e289 | ||
|
|
27e9ce7542 | ||
|
|
02c1c60f3b | ||
|
|
27ea9472c1 | ||
|
|
801f58790a | ||
|
|
080834e3ce | ||
|
|
4e7b2094d0 | ||
|
|
c72e04bf55 | ||
|
|
abbe4c3a2f | ||
|
|
9e21b85043 | ||
|
|
b44917b7f9 | ||
|
|
6f78dd6e6b | ||
|
|
593dc8fcee | ||
|
|
b94baa4cc5 | ||
|
|
1a054135fb | ||
|
|
4e2253a406 | ||
|
|
9af7ec0a4e | ||
|
|
8624216e21 | ||
|
|
954e71d19c | ||
|
|
5124059422 | ||
|
|
d18cd6f42f | ||
|
|
c20211a06c | ||
|
|
cdfe7d0d2d | ||
|
|
e57647c1d7 | ||
|
|
762abd8bba | ||
|
|
d1ea402792 | ||
|
|
65f65b1776 | ||
|
|
9388c29207 | ||
|
|
9f91f4b180 | ||
|
|
d9bfd29113 | ||
|
|
4711aab7b3 | ||
|
|
30d20c1206 | ||
|
|
83b177b7fb | ||
|
|
b96319dd64 | ||
|
|
74b88dc62c | ||
|
|
d30a2a0915 |
2
.github/ISSUE_TEMPLATE/bug_report.md
vendored
2
.github/ISSUE_TEMPLATE/bug_report.md
vendored
@@ -38,7 +38,7 @@ Yahoo Finance free service has rate-limiting depending on request type - roughly
|
||||
**Delete these instructions** and replace with your bug report, providing the following as best you can:
|
||||
|
||||
- Simple code that reproduces your problem, that we can copy-paste-run.
|
||||
- Run code with [debug logging enabled](https://github.com/ranaroussi/yfinance/tree/dev#logging) and post the full output.
|
||||
- Run code with [debug logging enabled](https://github.com/ranaroussi/yfinance#logging) and post the full output.
|
||||
- If you think `yfinance` returning bad data, give us proof.
|
||||
- `yfinance` version and Python version.
|
||||
- Operating system type.
|
||||
|
||||
14
.github/ISSUE_TEMPLATE/feature_request.md
vendored
14
.github/ISSUE_TEMPLATE/feature_request.md
vendored
@@ -1,14 +0,0 @@
|
||||
---
|
||||
name: Feature request
|
||||
about: Request a new feature
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
**Describe the problem**
|
||||
|
||||
**Describe the solution**
|
||||
|
||||
**Additional context**
|
||||
4
.github/workflows/python-publish.yml
vendored
4
.github/workflows/python-publish.yml
vendored
@@ -13,9 +13,9 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v2
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.x'
|
||||
- name: Install dependencies
|
||||
|
||||
@@ -1,20 +1,32 @@
|
||||
Change Log
|
||||
===========
|
||||
|
||||
0.2.19b4
|
||||
--------
|
||||
Fix `download` logging #1541
|
||||
Fix corrupt tkr-tz-csv halting code #1528
|
||||
0.2.24
|
||||
------
|
||||
Fix info[] missing values #1603
|
||||
|
||||
0.2.19b3
|
||||
-------
|
||||
Improve logging messages #1522
|
||||
Price fixes #1523
|
||||
0.2.23
|
||||
------
|
||||
Fix 'Unauthorized' error #1595
|
||||
|
||||
0.2.19b1 - beta
|
||||
-------
|
||||
Optimise Ticker.history #1514
|
||||
Logging module #1493
|
||||
0.2.22
|
||||
------
|
||||
Fix unhandled 'sqlite3.DatabaseError' #1574
|
||||
|
||||
0.2.21
|
||||
------
|
||||
Fix financials tables #1568
|
||||
Price repair update: fix Yahoo messing up dividend and split adjustments #1543
|
||||
Fix logging behaviour #1562
|
||||
Fix merge future div/split into prices #1567
|
||||
|
||||
0.2.20
|
||||
------
|
||||
Switch to `logging` module #1493 #1522 #1541
|
||||
Price history:
|
||||
- optimise #1514
|
||||
- fixes #1523
|
||||
- fix TZ-cache corruption #1528
|
||||
|
||||
0.2.18
|
||||
------
|
||||
|
||||
57
README.md
57
README.md
@@ -42,11 +42,6 @@ Yahoo! finance API is intended for personal use only.**
|
||||
|
||||
---
|
||||
|
||||
## News [2023-01-27]
|
||||
Since December 2022 Yahoo has been encrypting the web data that `yfinance` scrapes for non-market data. Fortunately the decryption keys are available, although Yahoo moved/changed them several times hence `yfinance` breaking several times. `yfinance` is now better prepared for any future changes by Yahoo.
|
||||
|
||||
Why is Yahoo doing this? We don't know. Is it to stop scrapers? Maybe, so we've implemented changes to reduce load on Yahoo. In December we rolled out version 0.2 with optimised scraping. ~Then in 0.2.6 introduced `Ticker.fast_info`, providing much faster access to some `info` elements wherever possible e.g. price stats and forcing users to switch (sorry but we think necessary). `info` will continue to exist for as long as there are elements without a fast alternative.~ `info` now fixed and much faster than before.
|
||||
|
||||
## Quick Start
|
||||
|
||||
### The Ticker module
|
||||
@@ -74,9 +69,6 @@ msft.splits
|
||||
msft.capital_gains # only for mutual funds & etfs
|
||||
|
||||
# show share count
|
||||
# - yearly summary:
|
||||
msft.shares
|
||||
# - accurate time-series count:
|
||||
msft.get_shares_full(start="2022-01-01", end=None)
|
||||
|
||||
# show financials:
|
||||
@@ -96,25 +88,6 @@ msft.major_holders
|
||||
msft.institutional_holders
|
||||
msft.mutualfund_holders
|
||||
|
||||
# show earnings
|
||||
msft.earnings
|
||||
msft.quarterly_earnings
|
||||
|
||||
# show sustainability
|
||||
msft.sustainability
|
||||
|
||||
# show analysts recommendations
|
||||
msft.recommendations
|
||||
msft.recommendations_summary
|
||||
# show analysts other work
|
||||
msft.analyst_price_target
|
||||
msft.revenue_forecasts
|
||||
msft.earnings_forecasts
|
||||
msft.earnings_trend
|
||||
|
||||
# show next event (earnings, etc)
|
||||
msft.calendar
|
||||
|
||||
# Show future and historic earnings dates, returns at most next 4 quarters and last 8 quarters by default.
|
||||
# Note: If more are needed use msft.get_earnings_dates(limit=XX) with increased limit argument.
|
||||
msft.earnings_dates
|
||||
@@ -171,31 +144,14 @@ To download price history into one table:
|
||||
|
||||
```python
|
||||
import yfinance as yf
|
||||
data = yf.download("SPY AAPL", start="2017-01-01", end="2017-04-30")
|
||||
data = yf.download("SPY AAPL", period="1mo")
|
||||
```
|
||||
|
||||
`yf.download()` and `Ticker.history()` have many options for configuring fetching and processing, e.g.:
|
||||
|
||||
```python
|
||||
yf.download(tickers = "SPY AAPL", # list of tickers
|
||||
period = "1y", # time period
|
||||
interval = "1d", # trading interval
|
||||
prepost = False, # download pre/post market hours data?
|
||||
repair = True) # repair obvious price errors e.g. 100x?
|
||||
```
|
||||
|
||||
Review the [Wiki](https://github.com/ranaroussi/yfinance/wiki) for more options and detail.
|
||||
#### `yf.download()` and `Ticker.history()` have many options for configuring fetching and processing. [Review the Wiki](https://github.com/ranaroussi/yfinance/wiki) for more options and detail.
|
||||
|
||||
### Logging
|
||||
|
||||
`yfinance` now uses the `logging` module. To control the detail of printed messages you simply change the level:
|
||||
```
|
||||
import logging
|
||||
logger = logging.getLogger('yfinance')
|
||||
logger.setLevel(logging.ERROR) # default: only print errors
|
||||
logger.setLevel(logging.CRITICAL) # disable printing
|
||||
logger.setLevel(logging.DEBUG) # verbose: print errors & debug info
|
||||
```
|
||||
`yfinance` now uses the `logging` module to handle messages, default behaviour is only print errors. If debugging, use `yf.enable_debug_mode()` to switch logging to debug with custom formatting.
|
||||
|
||||
### Smarter scraping
|
||||
|
||||
@@ -222,7 +178,7 @@ class CachedLimiterSession(CacheMixin, LimiterMixin, Session):
|
||||
pass
|
||||
|
||||
session = CachedLimiterSession(
|
||||
limiter=Limiter(RequestRate(2, Duration.SECOND*5), # max 2 requests per 5 seconds
|
||||
limiter=Limiter(RequestRate(2, Duration.SECOND*5)), # max 2 requests per 5 seconds
|
||||
bucket_class=MemoryQueueBucket,
|
||||
backend=SQLiteCache("yfinance.cache"),
|
||||
)
|
||||
@@ -282,6 +238,11 @@ Install `yfinance` using `pip`:
|
||||
$ pip install yfinance --upgrade --no-cache-dir
|
||||
```
|
||||
|
||||
Test new features by installing betas, provide feedback in [corresponding Discussion](https://github.com/ranaroussi/yfinance/discussions):
|
||||
``` {.sourceCode .bash}
|
||||
$ pip install yfinance --upgrade --no-cache-dir --pre
|
||||
```
|
||||
|
||||
To install `yfinance` using `conda`, see
|
||||
[this](https://anaconda.org/ranaroussi/yfinance).
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
{% set name = "yfinance" %}
|
||||
{% set version = "0.2.19b4" %}
|
||||
{% set version = "0.2.24" %}
|
||||
|
||||
package:
|
||||
name: "{{ name|lower }}"
|
||||
|
||||
3
setup.py
3
setup.py
@@ -63,9 +63,8 @@ setup(
|
||||
'requests>=2.26', 'multitasking>=0.0.7',
|
||||
'lxml>=4.9.1', 'appdirs>=1.4.4', 'pytz>=2022.5',
|
||||
'frozendict>=2.3.4',
|
||||
# 'pycryptodome>=3.6.6',
|
||||
'cryptography>=3.3.2',
|
||||
'beautifulsoup4>=4.11.1', 'html5lib>=1.1'],
|
||||
# Note: Pandas.read_html() needs html5lib & beautifulsoup4
|
||||
entry_points={
|
||||
'console_scripts': [
|
||||
'sample=sample:main',
|
||||
|
||||
@@ -26,11 +26,16 @@ class CachedLimiterSession(CacheMixin, LimiterMixin, Session):
|
||||
from pyrate_limiter import Duration, RequestRate, Limiter
|
||||
history_rate = RequestRate(1, Duration.SECOND*2)
|
||||
limiter = Limiter(history_rate)
|
||||
cache_fp = os.path.join(_ad.user_cache_dir(), "py-yfinance", "unittests-cache")
|
||||
if os.path.isfile(cache_fp + '.sqlite'):
|
||||
# Delete local cache if older than 1 day:
|
||||
mod_dt = _dt.datetime.fromtimestamp(os.path.getmtime(cache_fp + '.sqlite'))
|
||||
if mod_dt.date() < _dt.date.today():
|
||||
os.remove(cache_fp + '.sqlite')
|
||||
session_gbl = CachedLimiterSession(
|
||||
limiter=limiter,
|
||||
bucket_class=MemoryQueueBucket,
|
||||
backend=SQLiteCache(os.path.join(_ad.user_cache_dir(), "py-yfinance", "unittests-cache"),
|
||||
expire_after=_dt.timedelta(hours=1)),
|
||||
backend=SQLiteCache(cache_fp, expire_after=_dt.timedelta(hours=1)),
|
||||
)
|
||||
# Use this instead if only want rate-limiting:
|
||||
# from requests_ratelimiter import LimiterSession
|
||||
|
||||
23
tests/data/4063-T-bad-stock-split-fixed.csv
Normal file
23
tests/data/4063-T-bad-stock-split-fixed.csv
Normal file
@@ -0,0 +1,23 @@
|
||||
Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits
|
||||
2023-04-14 00:00:00+09:00,4126,4130,4055,4129,4129,7459400,0,0
|
||||
2023-04-13 00:00:00+09:00,4064,4099,4026,4081,4081,5160200,0,0
|
||||
2023-04-12 00:00:00+09:00,3968,4084,3966,4064,4064,6372000,0,0
|
||||
2023-04-11 00:00:00+09:00,3990,4019,3954,3960,3960,6476500,0,0
|
||||
2023-04-10 00:00:00+09:00,3996,4009,3949,3964,3964,3485200,0,0
|
||||
2023-04-07 00:00:00+09:00,3897,3975,3892,3953,3953,4554700,0,0
|
||||
2023-04-06 00:00:00+09:00,4002,4004,3920,3942,3942,8615200,0,0
|
||||
2023-04-05 00:00:00+09:00,4150,4150,4080,4088,4088,6063700,0,0
|
||||
2023-04-04 00:00:00+09:00,4245,4245,4144,4155,4155,6780600,0,0
|
||||
2023-04-03 00:00:00+09:00,4250,4259,4162,4182,4182,7076800,0,0
|
||||
2023-03-31 00:00:00+09:00,4229,4299,4209,4275,4275,9608400,0,0
|
||||
2023-03-30 00:00:00+09:00,4257,4268,4119,4161,4161,5535200,55,5
|
||||
2023-03-29 00:00:00+09:00,4146,4211,4146,4206,4151,6514500,0,0
|
||||
2023-03-28 00:00:00+09:00,4200,4207,4124,4142,4087.837109375,4505500,0,0
|
||||
2023-03-27 00:00:00+09:00,4196,4204,4151,4192,4137.183203125,5959500,0,0
|
||||
2023-03-24 00:00:00+09:00,4130,4187,4123,4177,4122.379296875,8961500,0,0
|
||||
2023-03-23 00:00:00+09:00,4056,4106,4039,4086,4032.569140625,5480000,0,0
|
||||
2023-03-22 00:00:00+09:00,4066,4128,4057,4122,4068.0984375,8741500,0,0
|
||||
2023-03-20 00:00:00+09:00,4000,4027,3980,3980,3927.95546875,7006500,0,0
|
||||
2023-03-17 00:00:00+09:00,4018,4055,4016,4031,3978.28828125,6961500,0,0
|
||||
2023-03-16 00:00:00+09:00,3976,4045,3972,4035,3982.236328125,5019000,0,0
|
||||
2023-03-15 00:00:00+09:00,4034,4050,4003,4041,3988.1578125,6122000,0,0
|
||||
|
23
tests/data/4063-T-bad-stock-split.csv
Normal file
23
tests/data/4063-T-bad-stock-split.csv
Normal file
@@ -0,0 +1,23 @@
|
||||
Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits
|
||||
2023-04-14 00:00:00+09:00,4126,4130,4055,4129,4129,7459400,0,0
|
||||
2023-04-13 00:00:00+09:00,4064,4099,4026,4081,4081,5160200,0,0
|
||||
2023-04-12 00:00:00+09:00,3968,4084,3966,4064,4064,6372000,0,0
|
||||
2023-04-11 00:00:00+09:00,3990,4019,3954,3960,3960,6476500,0,0
|
||||
2023-04-10 00:00:00+09:00,3996,4009,3949,3964,3964,3485200,0,0
|
||||
2023-04-07 00:00:00+09:00,3897,3975,3892,3953,3953,4554700,0,0
|
||||
2023-04-06 00:00:00+09:00,4002,4004,3920,3942,3942,8615200,0,0
|
||||
2023-04-05 00:00:00+09:00,4150,4150,4080,4088,4088,6063700,0,0
|
||||
2023-04-04 00:00:00+09:00,4245,4245,4144,4155,4155,6780600,0,0
|
||||
2023-04-03 00:00:00+09:00,4250,4259,4162,4182,4182,7076800,0,0
|
||||
2023-03-31 00:00:00+09:00,4229,4299,4209,4275,4275,9608400,0,0
|
||||
2023-03-30 00:00:00+09:00,4257,4268,4119,4161,4161,5535200,55,5
|
||||
2023-03-29 00:00:00+09:00,4146,4211,4146,4206,4151,6514500,0,0
|
||||
2023-03-28 00:00:00+09:00,21000,21035,20620,20710,20439.185546875,901100,0,0
|
||||
2023-03-27 00:00:00+09:00,20980,21020,20755,20960,20685.916015625,1191900,0,0
|
||||
2023-03-24 00:00:00+09:00,20650,20935,20615,20885,20611.896484375,1792300,0,0
|
||||
2023-03-23 00:00:00+09:00,20280,20530,20195,20430,20162.845703125,1096000,0,0
|
||||
2023-03-22 00:00:00+09:00,20330,20640,20285,20610,20340.4921875,1748300,0,0
|
||||
2023-03-20 00:00:00+09:00,20000,20135,19900,19900,19639.77734375,1401300,0,0
|
||||
2023-03-17 00:00:00+09:00,20090,20275,20080,20155,19891.44140625,1392300,0,0
|
||||
2023-03-16 00:00:00+09:00,19880,20225,19860,20175,19911.181640625,1003800,0,0
|
||||
2023-03-15 00:00:00+09:00,20170,20250,20015,20205,19940.7890625,1224400,0,0
|
||||
|
30
tests/data/ALPHA-PA-bad-stock-split-fixed.csv
Normal file
30
tests/data/ALPHA-PA-bad-stock-split-fixed.csv
Normal file
@@ -0,0 +1,30 @@
|
||||
Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits
|
||||
2023-04-20 00:00:00+02:00,3,3,2,3,3,2076,0,0
|
||||
2023-04-21 00:00:00+02:00,3,3,2,3,3,2136,0,0
|
||||
2023-04-24 00:00:00+02:00,3,3,1,1,1,77147,0,0
|
||||
2023-04-25 00:00:00+02:00,1,2,1,2,2,9625,0,0
|
||||
2023-04-26 00:00:00+02:00,2,2,1,2,2,5028,0,0
|
||||
2023-04-27 00:00:00+02:00,2,2,1,1,1,3235,0,0
|
||||
2023-04-28 00:00:00+02:00,2,2,1,2,2,10944,0,0
|
||||
2023-05-02 00:00:00+02:00,2,2,2,2,2,12220,0,0
|
||||
2023-05-03 00:00:00+02:00,2,2,2,2,2,4683,0,0
|
||||
2023-05-04 00:00:00+02:00,2,2,1,2,2,3368,0,0
|
||||
2023-05-05 00:00:00+02:00,2,2,1,2,2,26069,0,0
|
||||
2023-05-08 00:00:00+02:00,1,2,1,1,1,70540,0,0
|
||||
2023-05-09 00:00:00+02:00,1,2,1,1,1,14228,0,0
|
||||
2023-05-10 00:00:00+02:00,1.08000004291534,1.39999997615814,0.879999995231628,1,1,81012,0,0.0001
|
||||
2023-05-11 00:00:00+02:00,1.03999996185303,1.03999996185303,0.850000023841858,1,1,40254,0,0
|
||||
2023-05-12 00:00:00+02:00,0.949999988079071,1.10000002384186,0.949999988079071,1.01999998092651,1.01999998092651,35026,0,0
|
||||
2023-05-15 00:00:00+02:00,0.949999988079071,1.01999998092651,0.860000014305115,0.939999997615814,0.939999997615814,41486,0,0
|
||||
2023-05-16 00:00:00+02:00,0.899999976158142,0.944000005722046,0.800000011920929,0.800000011920929,0.800000011920929,43583,0,0
|
||||
2023-05-17 00:00:00+02:00,0.850000023841858,0.850000023841858,0.779999971389771,0.810000002384186,0.810000002384186,29984,0,0
|
||||
2023-05-18 00:00:00+02:00,0.779999971389771,0.78600001335144,0.740000009536743,0.740000009536743,0.740000009536743,24679,0,0
|
||||
2023-05-19 00:00:00+02:00,0.78600001335144,0.78600001335144,0.649999976158142,0.65200001001358,0.65200001001358,26732,0,0
|
||||
2023-05-22 00:00:00+02:00,0.8299999833107,1.05999994277954,0.709999978542328,0.709999978542328,0.709999978542328,169538,0,0
|
||||
2023-05-23 00:00:00+02:00,0.899999976158142,1.60800004005432,0.860000014305115,1.22000002861023,1.22000002861023,858471,0,0
|
||||
2023-05-24 00:00:00+02:00,1.19400000572205,1.25999999046326,0.779999971389771,0.779999971389771,0.779999971389771,627823,0,0
|
||||
2023-05-25 00:00:00+02:00,0.980000019073486,1.22000002861023,0.702000021934509,0.732999980449677,0.732999980449677,1068939,0,0
|
||||
2023-05-26 00:00:00+02:00,0.660000026226044,0.72000002861023,0.602999985218048,0.611999988555908,0.611999988555908,631580,0,0
|
||||
2023-05-29 00:00:00+02:00,0.620000004768372,0.75,0.578999996185303,0.600000023841858,0.600000023841858,586150,0,0
|
||||
2023-05-30 00:00:00+02:00,0.610000014305115,0.634999990463257,0.497000008821487,0.497000008821487,0.497000008821487,552308,0,0
|
||||
2023-05-31 00:00:00+02:00,0.458999991416931,0.469999998807907,0.374000012874603,0.379999995231628,0.379999995231628,899067,0,0
|
||||
|
30
tests/data/ALPHA-PA-bad-stock-split.csv
Normal file
30
tests/data/ALPHA-PA-bad-stock-split.csv
Normal file
@@ -0,0 +1,30 @@
|
||||
Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits
|
||||
2023-04-20 00:00:00+02:00,3.0,3.0,2.0,3.0,3.0,2076,0.0,0.0
|
||||
2023-04-21 00:00:00+02:00,3.0,3.0,2.0,3.0,3.0,2136,0.0,0.0
|
||||
2023-04-24 00:00:00+02:00,3.0,3.0,1.0,1.0,1.0,77147,0.0,0.0
|
||||
2023-04-25 00:00:00+02:00,1.0,2.0,1.0,2.0,2.0,9625,0.0,0.0
|
||||
2023-04-26 00:00:00+02:00,2.0,2.0,1.0,2.0,2.0,5028,0.0,0.0
|
||||
2023-04-27 00:00:00+02:00,2.0,2.0,1.0,1.0,1.0,3235,0.0,0.0
|
||||
2023-04-28 00:00:00+02:00,2.0,2.0,1.0,2.0,2.0,10944,0.0,0.0
|
||||
2023-05-02 00:00:00+02:00,2.0,2.0,2.0,2.0,2.0,12220,0.0,0.0
|
||||
2023-05-03 00:00:00+02:00,2.0,2.0,2.0,2.0,2.0,4683,0.0,0.0
|
||||
2023-05-04 00:00:00+02:00,2.0,2.0,1.0,2.0,2.0,3368,0.0,0.0
|
||||
2023-05-05 00:00:00+02:00,2.0,2.0,1.0,2.0,2.0,26069,0.0,0.0
|
||||
2023-05-08 00:00:00+02:00,9.999999747378752e-05,0.00019999999494757503,9.999999747378752e-05,9.999999747378752e-05,9.999999747378752e-05,705399568,0.0,0.0
|
||||
2023-05-09 00:00:00+02:00,1.0,2.0,1.0,1.0,1.0,14228,0.0,0.0
|
||||
2023-05-10 00:00:00+02:00,1.0800000429153442,1.399999976158142,0.8799999952316284,1.0,1.0,81012,0.0,0.0001
|
||||
2023-05-11 00:00:00+02:00,1.0399999618530273,1.0399999618530273,0.8500000238418579,1.0,1.0,40254,0.0,0.0
|
||||
2023-05-12 00:00:00+02:00,0.949999988079071,1.100000023841858,0.949999988079071,1.0199999809265137,1.0199999809265137,35026,0.0,0.0
|
||||
2023-05-15 00:00:00+02:00,0.949999988079071,1.0199999809265137,0.8600000143051147,0.9399999976158142,0.9399999976158142,41486,0.0,0.0
|
||||
2023-05-16 00:00:00+02:00,0.8999999761581421,0.9440000057220459,0.800000011920929,0.800000011920929,0.800000011920929,43583,0.0,0.0
|
||||
2023-05-17 00:00:00+02:00,0.8500000238418579,0.8500000238418579,0.7799999713897705,0.8100000023841858,0.8100000023841858,29984,0.0,0.0
|
||||
2023-05-18 00:00:00+02:00,0.7799999713897705,0.7860000133514404,0.7400000095367432,0.7400000095367432,0.7400000095367432,24679,0.0,0.0
|
||||
2023-05-19 00:00:00+02:00,0.7860000133514404,0.7860000133514404,0.6499999761581421,0.6520000100135803,0.6520000100135803,26732,0.0,0.0
|
||||
2023-05-22 00:00:00+02:00,0.8299999833106995,1.059999942779541,0.7099999785423279,0.7099999785423279,0.7099999785423279,169538,0.0,0.0
|
||||
2023-05-23 00:00:00+02:00,0.8999999761581421,1.6080000400543213,0.8600000143051147,1.2200000286102295,1.2200000286102295,858471,0.0,0.0
|
||||
2023-05-24 00:00:00+02:00,1.194000005722046,1.2599999904632568,0.7799999713897705,0.7799999713897705,0.7799999713897705,627823,0.0,0.0
|
||||
2023-05-25 00:00:00+02:00,0.9800000190734863,1.2200000286102295,0.7020000219345093,0.7329999804496765,0.7329999804496765,1068939,0.0,0.0
|
||||
2023-05-26 00:00:00+02:00,0.6600000262260437,0.7200000286102295,0.6029999852180481,0.6119999885559082,0.6119999885559082,631580,0.0,0.0
|
||||
2023-05-29 00:00:00+02:00,0.6200000047683716,0.75,0.5789999961853027,0.6000000238418579,0.6000000238418579,586150,0.0,0.0
|
||||
2023-05-30 00:00:00+02:00,0.6100000143051147,0.6349999904632568,0.4970000088214874,0.4970000088214874,0.4970000088214874,552308,0.0,0.0
|
||||
2023-05-31 00:00:00+02:00,0.45899999141693115,0.4699999988079071,0.37400001287460327,0.3799999952316284,0.3799999952316284,899067,0.0,0.0
|
||||
|
11
tests/data/CNE-L-bad-stock-split-fixed.csv
Normal file
11
tests/data/CNE-L-bad-stock-split-fixed.csv
Normal file
@@ -0,0 +1,11 @@
|
||||
Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits
|
||||
2023-05-18 00:00:00+01:00,193.220001220703,200.839996337891,193.220001220703,196.839996337891,196.839996337891,653125,0,0
|
||||
2023-05-17 00:00:00+01:00,199.740005493164,207.738006591797,190.121994018555,197.860000610352,197.860000610352,822268,0,0
|
||||
2023-05-16 00:00:00+01:00,215.600006103516,215.600006103516,201.149993896484,205.100006103516,205.100006103516,451009,243.93939,0.471428571428571
|
||||
2023-05-15 00:00:00+01:00,215.399955531529,219.19995640346,210.599967302595,217.399987792969,102.39998147147,1761679.3939394,0,0
|
||||
2023-05-12 00:00:00+01:00,214.599988664899,216.199965558733,209.599965558733,211.399977329799,99.573855808803,1522298.48484849,0,0
|
||||
2023-05-11 00:00:00+01:00,219.999966430664,219.999966430664,212.199987357003,215.000000871931,101.269541277204,3568042.12121213,0,0
|
||||
2023-05-10 00:00:00+01:00,218.199954659598,223.000000435965,212.59995640346,215.399955531529,101.457929992676,5599908.78787879,0,0
|
||||
2023-05-09 00:00:00+01:00,224,227.688003540039,218.199996948242,218.399993896484,102.87100982666,1906090,0,0
|
||||
2023-05-05 00:00:00+01:00,220.999968174526,225.19996686663,220.799976457868,224.4,105.697140066964,964523.636363637,0,0
|
||||
2023-05-04 00:00:00+01:00,216.999989972796,222.799965558733,216.881988961356,221.399965994698,104.284055655343,880983.93939394,0,0
|
||||
|
11
tests/data/CNE-L-bad-stock-split.csv
Normal file
11
tests/data/CNE-L-bad-stock-split.csv
Normal file
@@ -0,0 +1,11 @@
|
||||
Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits
|
||||
2023-05-18 00:00:00+01:00,193.220001220703,200.839996337891,193.220001220703,196.839996337891,196.839996337891,653125,0,0
|
||||
2023-05-17 00:00:00+01:00,199.740005493164,207.738006591797,190.121994018555,197.860000610352,197.860000610352,822268,0,0
|
||||
2023-05-16 00:00:00+01:00,215.600006103516,215.600006103516,201.149993896484,205.100006103516,205.100006103516,451009,243.93939,0.471428571428571
|
||||
2023-05-15 00:00:00+01:00,456.908996582031,464.969604492188,446.727203369141,461.151489257813,217.21208190918,830506,0,0
|
||||
2023-05-12 00:00:00+01:00,455.212097167969,458.605987548828,444.605987548828,448.424194335938,211.217269897461,717655,0,0
|
||||
2023-05-11 00:00:00+01:00,466.666595458984,466.666595458984,450.121185302734,456.060607910156,214.814178466797,1682077,0,0
|
||||
2023-05-10 00:00:00+01:00,462.848388671875,473.030303955078,450.969604492188,456.908996582031,215.213790893555,2639957,0,0
|
||||
2023-05-09 00:00:00+01:00,224,227.688003540039,218.199996948242,218.399993896484,102.87100982666,1906090,0,0
|
||||
2023-05-05 00:00:00+01:00,468.787811279297,477.696899414063,468.363586425781,476,224.2060546875,454704,0,0
|
||||
2023-05-04 00:00:00+01:00,460.303009033203,472.605987548828,460.052703857422,469.636291503906,221.208602905273,415321,0,0
|
||||
|
24
tests/data/DEX-AX-bad-stock-split-fixed.csv
Normal file
24
tests/data/DEX-AX-bad-stock-split-fixed.csv
Normal file
@@ -0,0 +1,24 @@
|
||||
Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits
|
||||
2023-05-31 00:00:00+10:00,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0,0,0
|
||||
2023-05-30 00:00:00+10:00,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0,0,0.4406
|
||||
2023-05-29 00:00:00+10:00,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0,0,0
|
||||
2023-05-26 00:00:00+10:00,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0,0,0
|
||||
2023-05-25 00:00:00+10:00,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0,0,0
|
||||
2023-05-24 00:00:00+10:00,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0,0,0
|
||||
2023-05-23 00:00:00+10:00,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0,0,0
|
||||
2023-05-22 00:00:00+10:00,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0,0,0
|
||||
2023-05-19 00:00:00+10:00,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0,0,0
|
||||
2023-05-18 00:00:00+10:00,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0,0,0
|
||||
2023-05-17 00:00:00+10:00,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0,0,0
|
||||
2023-05-16 00:00:00+10:00,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0,0,0
|
||||
2023-05-15 00:00:00+10:00,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0,0,0
|
||||
2023-05-12 00:00:00+10:00,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0,0,0
|
||||
2023-05-11 00:00:00+10:00,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0,0,0
|
||||
2023-05-10 00:00:00+10:00,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0,0,0
|
||||
2023-05-09 00:00:00+10:00,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0,0,0
|
||||
2023-05-08 00:00:00+10:00,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0,0,0
|
||||
2023-05-05 00:00:00+10:00,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0,0,0
|
||||
2023-05-04 00:00:00+10:00,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0,0,0
|
||||
2023-05-03 00:00:00+10:00,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0,0,0
|
||||
2023-05-02 00:00:00+10:00,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0,0,0
|
||||
2023-05-01 00:00:00+10:00,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0,0,0
|
||||
|
24
tests/data/DEX-AX-bad-stock-split.csv
Normal file
24
tests/data/DEX-AX-bad-stock-split.csv
Normal file
@@ -0,0 +1,24 @@
|
||||
Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits
|
||||
2023-05-31 00:00:00+10:00,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0,0,0
|
||||
2023-05-30 00:00:00+10:00,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0,0,0.4406
|
||||
2023-05-29 00:00:00+10:00,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0,0,0
|
||||
2023-05-26 00:00:00+10:00,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0,0,0
|
||||
2023-05-25 00:00:00+10:00,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0,0,0
|
||||
2023-05-24 00:00:00+10:00,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0,0,0
|
||||
2023-05-23 00:00:00+10:00,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0,0,0
|
||||
2023-05-22 00:00:00+10:00,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0.120290003716946,0,0,0
|
||||
2023-05-19 00:00:00+10:00,0.0529999993741512,0.0529999993741512,0.0529999993741512,0.0529999993741512,0.0529999993741512,0,0,0
|
||||
2023-05-18 00:00:00+10:00,0.0529999993741512,0.0529999993741512,0.0529999993741512,0.0529999993741512,0.0529999993741512,0,0,0
|
||||
2023-05-17 00:00:00+10:00,0.0529999993741512,0.0529999993741512,0.0529999993741512,0.0529999993741512,0.0529999993741512,0,0,0
|
||||
2023-05-16 00:00:00+10:00,0.0529999993741512,0.0529999993741512,0.0529999993741512,0.0529999993741512,0.0529999993741512,0,0,0
|
||||
2023-05-15 00:00:00+10:00,0.0529999993741512,0.0529999993741512,0.0529999993741512,0.0529999993741512,0.0529999993741512,0,0,0
|
||||
2023-05-12 00:00:00+10:00,0.0529999993741512,0.0529999993741512,0.0529999993741512,0.0529999993741512,0.0529999993741512,0,0,0
|
||||
2023-05-11 00:00:00+10:00,0.0529999993741512,0.0529999993741512,0.0529999993741512,0.0529999993741512,0.0529999993741512,0,0,0
|
||||
2023-05-10 00:00:00+10:00,0.0529999993741512,0.0529999993741512,0.0529999993741512,0.0529999993741512,0.0529999993741512,0,0,0
|
||||
2023-05-09 00:00:00+10:00,0.0529999993741512,0.0529999993741512,0.0529999993741512,0.0529999993741512,0.0529999993741512,0,0,0
|
||||
2023-05-08 00:00:00+10:00,0.0529999993741512,0.0529999993741512,0.0529999993741512,0.0529999993741512,0.0529999993741512,0,0,0
|
||||
2023-05-05 00:00:00+10:00,0.0529999993741512,0.0529999993741512,0.0529999993741512,0.0529999993741512,0.0529999993741512,0,0,0
|
||||
2023-05-04 00:00:00+10:00,0.0529999993741512,0.0529999993741512,0.0529999993741512,0.0529999993741512,0.0529999993741512,0,0,0
|
||||
2023-05-03 00:00:00+10:00,0.0529999993741512,0.0529999993741512,0.0529999993741512,0.0529999993741512,0.0529999993741512,0,0,0
|
||||
2023-05-02 00:00:00+10:00,0.0529999993741512,0.0529999993741512,0.0529999993741512,0.0529999993741512,0.0529999993741512,0,0,0
|
||||
2023-05-01 00:00:00+10:00,0.0529999993741512,0.0529999993741512,0.0529999993741512,0.0529999993741512,0.0529999993741512,0,0,0
|
||||
|
17
tests/data/MOB-ST-bad-stock-split-fixed.csv
Normal file
17
tests/data/MOB-ST-bad-stock-split-fixed.csv
Normal file
@@ -0,0 +1,17 @@
|
||||
Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits
|
||||
2023-05-08 00:00:00+02:00,24.8999996185303,24.9500007629395,24.1000003814697,24.75,24.75,7187,0,0
|
||||
2023-05-09 00:00:00+02:00,25,25.5,23.1499996185303,24.1499996185303,24.1499996185303,22753,0,0
|
||||
2023-05-10 00:00:00+02:00,24.1499996185303,24.1499996185303,22,22.9500007629395,22.9500007629395,62727,0,0
|
||||
2023-05-11 00:00:00+02:00,22.9500007629395,25,22.9500007629395,23.3500003814697,23.3500003814697,19550,0,0
|
||||
2023-05-12 00:00:00+02:00,23.3500003814697,24,22.1000003814697,23.8500003814697,23.8500003814697,17143,0,0
|
||||
2023-05-15 00:00:00+02:00,23,25.7999992370605,22.5,23,23,43709,0,0
|
||||
2023-05-16 00:00:00+02:00,22.75,24.0499992370605,22.5,22.75,22.75,16068,0,0
|
||||
2023-05-17 00:00:00+02:00,23,23.8500003814697,22.1000003814697,23.6499996185303,23.6499996185303,19926,0,0
|
||||
2023-05-19 00:00:00+02:00,23.6499996185303,23.8500003814697,22.1000003814697,22.2999992370605,22.2999992370605,41050,0,0
|
||||
2023-05-22 00:00:00+02:00,22.0000004768372,24.1499996185303,21.5499997138977,22.7500009536743,22.7500009536743,34022,0,0
|
||||
2023-05-23 00:00:00+02:00,22.75,22.8999996185303,21.75,22.5,22.5,13992,0,0
|
||||
2023-05-24 00:00:00+02:00,21,24,21,22.0100002288818,22.0100002288818,18306,0,0.1
|
||||
2023-05-25 00:00:00+02:00,21.5699996948242,22.8899993896484,20,21.1599998474121,21.1599998474121,35398,0,0
|
||||
2023-05-26 00:00:00+02:00,21.1599998474121,22.4950008392334,20.5,21.0949993133545,21.0949993133545,8039,0,0
|
||||
2023-05-29 00:00:00+02:00,22.1000003814697,22.1000003814697,20.25,20.75,20.75,17786,0,0
|
||||
2023-05-30 00:00:00+02:00,20.75,21.6499996185303,20.1499996185303,20.4500007629395,20.4500007629395,10709,0,0
|
||||
|
17
tests/data/MOB-ST-bad-stock-split.csv
Normal file
17
tests/data/MOB-ST-bad-stock-split.csv
Normal file
@@ -0,0 +1,17 @@
|
||||
Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits
|
||||
2023-05-08 00:00:00+02:00,24.899999618530273,24.950000762939453,24.100000381469727,24.75,24.75,7187,0.0,0.0
|
||||
2023-05-09 00:00:00+02:00,25.0,25.5,23.149999618530273,24.149999618530273,24.149999618530273,22753,0.0,0.0
|
||||
2023-05-10 00:00:00+02:00,24.149999618530273,24.149999618530273,22.0,22.950000762939453,22.950000762939453,62727,0.0,0.0
|
||||
2023-05-11 00:00:00+02:00,22.950000762939453,25.0,22.950000762939453,23.350000381469727,23.350000381469727,19550,0.0,0.0
|
||||
2023-05-12 00:00:00+02:00,23.350000381469727,24.0,22.100000381469727,23.850000381469727,23.850000381469727,17143,0.0,0.0
|
||||
2023-05-15 00:00:00+02:00,23.0,25.799999237060547,22.5,23.0,23.0,43709,0.0,0.0
|
||||
2023-05-16 00:00:00+02:00,22.75,24.049999237060547,22.5,22.75,22.75,16068,0.0,0.0
|
||||
2023-05-17 00:00:00+02:00,23.0,23.850000381469727,22.100000381469727,23.649999618530273,23.649999618530273,19926,0.0,0.0
|
||||
2023-05-19 00:00:00+02:00,23.649999618530273,23.850000381469727,22.100000381469727,22.299999237060547,22.299999237060547,41050,0.0,0.0
|
||||
2023-05-22 00:00:00+02:00,2.200000047683716,2.4149999618530273,2.1549999713897705,2.2750000953674316,2.2750000953674316,340215,0.0,0.0
|
||||
2023-05-23 00:00:00+02:00,22.75,22.899999618530273,21.75,22.5,22.5,13992,0.0,0.0
|
||||
2023-05-24 00:00:00+02:00,21.0,24.0,21.0,22.010000228881836,22.010000228881836,18306,0.0,0.1
|
||||
2023-05-25 00:00:00+02:00,21.56999969482422,22.889999389648438,20.0,21.15999984741211,21.15999984741211,35398,0.0,0.0
|
||||
2023-05-26 00:00:00+02:00,21.15999984741211,22.4950008392334,20.5,21.094999313354492,21.094999313354492,8039,0.0,0.0
|
||||
2023-05-29 00:00:00+02:00,22.100000381469727,22.100000381469727,20.25,20.75,20.75,17786,0.0,0.0
|
||||
2023-05-30 00:00:00+02:00,20.75,21.649999618530273,20.149999618530273,20.450000762939453,20.450000762939453,10709,0.0,0.0
|
||||
|
23
tests/data/SPM-MI-bad-stock-split-fixed.csv
Normal file
23
tests/data/SPM-MI-bad-stock-split-fixed.csv
Normal file
@@ -0,0 +1,23 @@
|
||||
Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits
|
||||
2022-06-01 00:00:00+02:00,5.72999992370606,5.78199996948242,5.3939998626709,5.3939998626709,5.3939998626709,3095860,0,0
|
||||
2022-06-02 00:00:00+02:00,5.38600006103516,5.38600006103516,5.26800003051758,5.2939998626709,5.2939998626709,1662880,0,0
|
||||
2022-06-03 00:00:00+02:00,5.34599990844727,5.34599990844727,5.15800018310547,5.16800003051758,5.16800003051758,1698900,0,0
|
||||
2022-06-06 00:00:00+02:00,5.16800003051758,5.25200004577637,5.13800010681152,5.18800010681152,5.18800010681152,1074910,0,0
|
||||
2022-06-07 00:00:00+02:00,5.21800003051758,5.22200012207031,5.07400016784668,5.1560001373291,5.1560001373291,1850680,0,0
|
||||
2022-06-08 00:00:00+02:00,5.1560001373291,5.17599983215332,5.07200012207031,5.10200004577637,5.10200004577637,1140360,0,0
|
||||
2022-06-09 00:00:00+02:00,5.09799995422363,5.09799995422363,4.87599983215332,4.8939998626709,4.8939998626709,2025480,0,0
|
||||
2022-06-10 00:00:00+02:00,4.87999992370606,4.87999992370606,4.50400009155274,4.50400009155274,4.50400009155274,2982730,0,0
|
||||
2022-06-13 00:00:00+02:00,4.3,4.37599983215332,3.83600006103516,3.83600006103516,3.83600006103516,4568210,0,0.1
|
||||
2022-06-14 00:00:00+02:00,3.87750015258789,4.15999984741211,3.85200004577637,3.9439998626709,3.9439998626709,5354500,0,0
|
||||
2022-06-15 00:00:00+02:00,4.03400001525879,4.16450004577637,3.73050003051758,3.73050003051758,3.73050003051758,6662610,0,0
|
||||
2022-06-16 00:00:00+02:00,3.73050003051758,3.98499984741211,3.72400016784668,3.82550010681152,3.82550010681152,13379960,0,0
|
||||
2022-06-17 00:00:00+02:00,3.8,4.29949989318848,3.75,4.29949989318848,4.29949989318848,12844160,0,0
|
||||
2022-06-20 00:00:00+02:00,2.19422197341919,2.2295401096344,2.13992595672607,2.2295401096344,2.2295401096344,12364104,0,0
|
||||
2022-06-21 00:00:00+02:00,2.24719905853272,2.28515291213989,2.19712090492249,2.21557092666626,2.21557092666626,8434013,0,0
|
||||
2022-06-22 00:00:00+02:00,1.98679196834564,2.00365996360779,1.73798203468323,1.73798203468323,1.73798203468323,26496542,0,0
|
||||
2022-06-23 00:00:00+02:00,1.62411904335022,1.68526804447174,1.37320005893707,1.59776198863983,1.59776198863983,48720201,0,0
|
||||
2022-06-24 00:00:00+02:00,1.47599303722382,1.54610300064087,1.1739410161972,1.24932205677032,1.24932205677032,56877192,0,0
|
||||
2022-06-27 00:00:00+02:00,1.49899995326996,1.79849994182587,1.49899995326996,1.79849994182587,1.79849994182587,460673,0,0
|
||||
2022-06-28 00:00:00+02:00,2.15799999237061,3.05100011825562,2.12599992752075,3.05100011825562,3.05100011825562,3058635,0,0
|
||||
2022-06-29 00:00:00+02:00,2.90000009536743,3.73799991607666,2.85899996757507,3.26399993896484,3.26399993896484,6516761,0,0
|
||||
2022-06-30 00:00:00+02:00,3.24900007247925,3.28099989891052,2.5,2.5550000667572,2.5550000667572,4805984,0,0
|
||||
|
23
tests/data/SPM-MI-bad-stock-split.csv
Normal file
23
tests/data/SPM-MI-bad-stock-split.csv
Normal file
@@ -0,0 +1,23 @@
|
||||
Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits
|
||||
2022-06-01 00:00:00+02:00,57.29999923706055,57.81999969482422,53.939998626708984,53.939998626708984,53.939998626708984,309586,0.0,0.0
|
||||
2022-06-02 00:00:00+02:00,53.86000061035156,53.86000061035156,52.68000030517578,52.939998626708984,52.939998626708984,166288,0.0,0.0
|
||||
2022-06-03 00:00:00+02:00,53.459999084472656,53.459999084472656,51.58000183105469,51.68000030517578,51.68000030517578,169890,0.0,0.0
|
||||
2022-06-06 00:00:00+02:00,51.68000030517578,52.52000045776367,51.380001068115234,51.880001068115234,51.880001068115234,107491,0.0,0.0
|
||||
2022-06-07 00:00:00+02:00,52.18000030517578,52.220001220703125,50.7400016784668,51.560001373291016,51.560001373291016,185068,0.0,0.0
|
||||
2022-06-08 00:00:00+02:00,51.560001373291016,51.7599983215332,50.720001220703125,51.02000045776367,51.02000045776367,114036,0.0,0.0
|
||||
2022-06-09 00:00:00+02:00,50.97999954223633,50.97999954223633,48.7599983215332,48.939998626708984,48.939998626708984,202548,0.0,0.0
|
||||
2022-06-10 00:00:00+02:00,48.79999923706055,48.79999923706055,45.040000915527344,45.040000915527344,45.040000915527344,298273,0.0,0.0
|
||||
2022-06-13 00:00:00+02:00,43.0,43.7599983215332,38.36000061035156,38.36000061035156,38.36000061035156,456821,0.0,0.1
|
||||
2022-06-14 00:00:00+02:00,38.775001525878906,41.599998474121094,38.52000045776367,39.439998626708984,39.439998626708984,535450,0.0,0.0
|
||||
2022-06-15 00:00:00+02:00,40.34000015258789,41.64500045776367,37.30500030517578,37.30500030517578,37.30500030517578,666261,0.0,0.0
|
||||
2022-06-16 00:00:00+02:00,37.30500030517578,39.849998474121094,37.2400016784668,38.255001068115234,38.255001068115234,1337996,0.0,0.0
|
||||
2022-06-17 00:00:00+02:00,38.0,42.994998931884766,37.5,42.994998931884766,42.994998931884766,1284416,0.0,0.0
|
||||
2022-06-20 00:00:00+02:00,2.1942219734191895,2.2295401096343994,2.139925956726074,2.2295401096343994,2.2295401096343994,12364104,0.0,0.0
|
||||
2022-06-21 00:00:00+02:00,2.247199058532715,2.2851529121398926,2.1971209049224854,2.2155709266662598,2.2155709266662598,8434013,0.0,0.0
|
||||
2022-06-22 00:00:00+02:00,1.986791968345642,2.003659963607788,1.7379820346832275,1.7379820346832275,1.7379820346832275,26496542,0.0,0.0
|
||||
2022-06-23 00:00:00+02:00,1.6241190433502197,1.6852680444717407,1.3732000589370728,1.5977619886398315,1.5977619886398315,48720201,0.0,0.0
|
||||
2022-06-24 00:00:00+02:00,1.475993037223816,1.5461030006408691,1.1739410161972046,1.2493220567703247,1.2493220567703247,56877192,0.0,0.0
|
||||
2022-06-27 00:00:00+02:00,1.4989999532699585,1.7984999418258667,1.4989999532699585,1.7984999418258667,1.7984999418258667,460673,0.0,0.0
|
||||
2022-06-28 00:00:00+02:00,2.1579999923706055,3.0510001182556152,2.125999927520752,3.0510001182556152,3.0510001182556152,3058635,0.0,0.0
|
||||
2022-06-29 00:00:00+02:00,2.9000000953674316,3.73799991607666,2.8589999675750732,3.2639999389648438,3.2639999389648438,6516761,0.0,0.0
|
||||
2022-06-30 00:00:00+02:00,3.249000072479248,3.2809998989105225,2.5,2.555000066757202,2.555000066757202,4805984,0.0,0.0
|
||||
|
30
tests/data/SSW-JO-100x-error.csv
Normal file
30
tests/data/SSW-JO-100x-error.csv
Normal file
@@ -0,0 +1,30 @@
|
||||
Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits
|
||||
2023-06-09 00:00:00+02:00,34.700001,34.709999,33.240002,33.619999,33.619999,7148409,0,0
|
||||
2023-06-08 00:00:00+02:00,34.900002,34.990002,34.040001,34.360001,34.360001,10406999,0,0
|
||||
2023-06-07 00:00:00+02:00,34.549999,35.639999,34.320000,35.090000,35.090000,10118918,0,0
|
||||
2023-06-06 00:00:00+02:00,34.500000,34.820000,34.049999,34.459999,34.459999,9109709,0,0
|
||||
2023-06-05 00:00:00+02:00,35.000000,35.299999,34.200001,34.700001,34.700001,8791993,0,0
|
||||
2023-06-02 00:00:00+02:00,35.689999,36.180000,34.599998,34.970001,34.970001,8844549,0,0
|
||||
2023-06-01 00:00:00+02:00,35.230000,35.380001,34.240002,35.349998,35.349998,6721030,0,0
|
||||
2023-05-31 00:00:00+02:00,3480,3548,3426,3501,3501,32605833,0,0
|
||||
2023-05-30 00:00:00+02:00,3439,3537,3385,3423,3423,8970804,0,0
|
||||
2023-05-29 00:00:00+02:00,3466,3506,3402,3432,3432,3912803,0,0
|
||||
2023-05-26 00:00:00+02:00,3475,3599,3433,3453,3453,6744718,0,0
|
||||
2023-05-25 00:00:00+02:00,3540,3609,3463,3507,3507,16900221,0,0
|
||||
2023-05-24 00:00:00+02:00,3620,3650,3526,3540,3540,9049505,0,0
|
||||
2023-05-23 00:00:00+02:00,3690,3667,3556,3610,3610,10797373,0,0
|
||||
2023-05-22 00:00:00+02:00,3705,3736,3609,3661,3661,7132641,0,0
|
||||
2023-05-19 00:00:00+02:00,3620,3715,3625,3690,3690,12648518,0,0
|
||||
2023-05-18 00:00:00+02:00,3657,3699,3584,3646,3646,10674542,0,0
|
||||
2023-05-17 00:00:00+02:00,3687,3731,3656,3671,3671,9892791,0,0
|
||||
2023-05-16 00:00:00+02:00,3715,3773,3696,3703,3703,4706789,0,0
|
||||
2023-05-15 00:00:00+02:00,3774,3805,3696,3727,3727,7890969,0,0
|
||||
2023-05-12 00:00:00+02:00,3750,3844,3671,3774,3774,8724303,0,0
|
||||
2023-05-11 00:00:00+02:00,3880,3888,3701,3732,3732,14371855,0,0
|
||||
2023-05-10 00:00:00+02:00,3893,3880,3642,3810,3810,30393389,0,0
|
||||
2023-05-09 00:00:00+02:00,4441,4441,3939,3966,3966,19833428,0,0
|
||||
2023-05-08 00:00:00+02:00,4463,4578,4456,4471,4471,11092519,0,0
|
||||
2023-05-05 00:00:00+02:00,4299,4490,4287,4458,4458,28539048,0,0
|
||||
2023-05-04 00:00:00+02:00,4149,4330,4123,4283,4283,15506868,0,0
|
||||
2023-05-03 00:00:00+02:00,3975,4098,3968,4095,4095,14657028,0,0
|
||||
2023-05-02 00:00:00+02:00,4037,4032,3917,3965,3965,11818133,0,0
|
||||
|
283
tests/prices.py
283
tests/prices.py
@@ -3,6 +3,7 @@ from .context import session_gbl
|
||||
|
||||
import unittest
|
||||
|
||||
import os
|
||||
import datetime as _dt
|
||||
import pytz as _tz
|
||||
import numpy as _np
|
||||
@@ -114,6 +115,32 @@ class TestPriceHistory(unittest.TestCase):
|
||||
self.skipTest("Skipping test_duplicatingWeekly() because not possible to fail Monday/weekend")
|
||||
|
||||
def test_intraDayWithEvents(self):
|
||||
tkrs = ["BHP.AX", "IMP.JO", "BP.L", "PNL.L", "INTC"]
|
||||
test_run = False
|
||||
for tkr in tkrs:
|
||||
start_d = _dt.date.today() - _dt.timedelta(days=59)
|
||||
end_d = None
|
||||
df_daily = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=True)
|
||||
df_daily_divs = df_daily["Dividends"][df_daily["Dividends"] != 0]
|
||||
if df_daily_divs.shape[0] == 0:
|
||||
continue
|
||||
|
||||
last_div_date = df_daily_divs.index[-1]
|
||||
start_d = last_div_date.date()
|
||||
end_d = last_div_date.date() + _dt.timedelta(days=1)
|
||||
df_intraday = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="15m", actions=True)
|
||||
self.assertTrue((df_intraday["Dividends"] != 0.0).any())
|
||||
|
||||
df_intraday_divs = df_intraday["Dividends"][df_intraday["Dividends"] != 0]
|
||||
df_intraday_divs.index = df_intraday_divs.index.floor('D')
|
||||
self.assertTrue(df_daily_divs.equals(df_intraday_divs))
|
||||
|
||||
test_run = True
|
||||
|
||||
if not test_run:
|
||||
self.skipTest("Skipping test_intraDayWithEvents() because no tickers had a dividend in last 60 days")
|
||||
|
||||
def test_intraDayWithEvents_tase(self):
|
||||
# TASE dividend release pre-market, doesn't merge nicely with intra-day data so check still present
|
||||
|
||||
tase_tkrs = ["ICL.TA", "ESLT.TA", "ONE.TA", "MGDL.TA"]
|
||||
@@ -124,21 +151,45 @@ class TestPriceHistory(unittest.TestCase):
|
||||
df_daily = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=True)
|
||||
df_daily_divs = df_daily["Dividends"][df_daily["Dividends"] != 0]
|
||||
if df_daily_divs.shape[0] == 0:
|
||||
# self.skipTest("Skipping test_intraDayWithEvents() because 'ICL.TA' has no dividend in last 60 days")
|
||||
continue
|
||||
|
||||
last_div_date = df_daily_divs.index[-1]
|
||||
start_d = last_div_date.date()
|
||||
end_d = last_div_date.date() + _dt.timedelta(days=1)
|
||||
df = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="15m", actions=True)
|
||||
self.assertTrue((df["Dividends"] != 0.0).any())
|
||||
df_intraday = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="15m", actions=True)
|
||||
self.assertTrue((df_intraday["Dividends"] != 0.0).any())
|
||||
|
||||
df_intraday_divs = df_intraday["Dividends"][df_intraday["Dividends"] != 0]
|
||||
df_intraday_divs.index = df_intraday_divs.index.floor('D')
|
||||
self.assertTrue(df_daily_divs.equals(df_intraday_divs))
|
||||
|
||||
test_run = True
|
||||
break
|
||||
|
||||
if not test_run:
|
||||
self.skipTest("Skipping test_intraDayWithEvents() because no tickers had a dividend in last 60 days")
|
||||
self.skipTest("Skipping test_intraDayWithEvents_tase() because no tickers had a dividend in last 60 days")
|
||||
|
||||
def test_dailyWithEvents(self):
|
||||
start_d = _dt.date(2022, 1, 1)
|
||||
end_d = _dt.date(2023, 1, 1)
|
||||
|
||||
tkr_div_dates = {}
|
||||
tkr_div_dates['BHP.AX'] = [_dt.date(2022, 9, 1), _dt.date(2022, 2, 24)] # Yahoo claims 23-Feb but wrong because DST
|
||||
tkr_div_dates['IMP.JO'] = [_dt.date(2022, 9, 21), _dt.date(2022, 3, 16)]
|
||||
tkr_div_dates['BP.L'] = [_dt.date(2022, 11, 10), _dt.date(2022, 8, 11), _dt.date(2022, 5, 12), _dt.date(2022, 2, 17)]
|
||||
tkr_div_dates['INTC'] = [_dt.date(2022, 11, 4), _dt.date(2022, 8, 4), _dt.date(2022, 5, 5), _dt.date(2022, 2, 4)]
|
||||
|
||||
for tkr,dates in tkr_div_dates.items():
|
||||
df = yf.Ticker(tkr, session=self.session).history(interval='1d', start=start_d, end=end_d)
|
||||
df_divs = df[df['Dividends']!=0].sort_index(ascending=False)
|
||||
try:
|
||||
self.assertTrue((df_divs.index.date == dates).all())
|
||||
except:
|
||||
print(f'- ticker = {tkr}')
|
||||
print('- response:') ; print(df_divs.index.date)
|
||||
print('- answer:') ; print(dates)
|
||||
raise
|
||||
|
||||
def test_dailyWithEvents_bugs(self):
|
||||
# Reproduce issue #521
|
||||
tkr1 = "QQQ"
|
||||
tkr2 = "GDX"
|
||||
@@ -172,6 +223,60 @@ class TestPriceHistory(unittest.TestCase):
|
||||
print("{}-without-events missing these dates: {}".format(tkr, missing_from_df2))
|
||||
raise
|
||||
|
||||
def test_intraDayWithEvents(self):
|
||||
tkrs = ["BHP.AX", "IMP.JO", "BP.L", "PNL.L", "INTC"]
|
||||
test_run = False
|
||||
for tkr in tkrs:
|
||||
start_d = _dt.date.today() - _dt.timedelta(days=59)
|
||||
end_d = None
|
||||
df_daily = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=True)
|
||||
df_daily_divs = df_daily["Dividends"][df_daily["Dividends"] != 0]
|
||||
if df_daily_divs.shape[0] == 0:
|
||||
continue
|
||||
|
||||
last_div_date = df_daily_divs.index[-1]
|
||||
start_d = last_div_date.date()
|
||||
end_d = last_div_date.date() + _dt.timedelta(days=1)
|
||||
df_intraday = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="15m", actions=True)
|
||||
self.assertTrue((df_intraday["Dividends"] != 0.0).any())
|
||||
|
||||
df_intraday_divs = df_intraday["Dividends"][df_intraday["Dividends"] != 0]
|
||||
df_intraday_divs.index = df_intraday_divs.index.floor('D')
|
||||
self.assertTrue(df_daily_divs.equals(df_intraday_divs))
|
||||
|
||||
test_run = True
|
||||
|
||||
if not test_run:
|
||||
self.skipTest("Skipping test_intraDayWithEvents() because no tickers had a dividend in last 60 days")
|
||||
|
||||
def test_intraDayWithEvents_tase(self):
|
||||
# TASE dividend release pre-market, doesn't merge nicely with intra-day data so check still present
|
||||
|
||||
tase_tkrs = ["ICL.TA", "ESLT.TA", "ONE.TA", "MGDL.TA"]
|
||||
test_run = False
|
||||
for tkr in tase_tkrs:
|
||||
start_d = _dt.date.today() - _dt.timedelta(days=59)
|
||||
end_d = None
|
||||
df_daily = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=True)
|
||||
df_daily_divs = df_daily["Dividends"][df_daily["Dividends"] != 0]
|
||||
if df_daily_divs.shape[0] == 0:
|
||||
continue
|
||||
|
||||
last_div_date = df_daily_divs.index[-1]
|
||||
start_d = last_div_date.date()
|
||||
end_d = last_div_date.date() + _dt.timedelta(days=1)
|
||||
df_intraday = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="15m", actions=True)
|
||||
self.assertTrue((df_intraday["Dividends"] != 0.0).any())
|
||||
|
||||
df_intraday_divs = df_intraday["Dividends"][df_intraday["Dividends"] != 0]
|
||||
df_intraday_divs.index = df_intraday_divs.index.floor('D')
|
||||
self.assertTrue(df_daily_divs.equals(df_intraday_divs))
|
||||
|
||||
test_run = True
|
||||
|
||||
if not test_run:
|
||||
self.skipTest("Skipping test_intraDayWithEvents_tase() because no tickers had a dividend in last 60 days")
|
||||
|
||||
def test_weeklyWithEvents(self):
|
||||
# Reproduce issue #521
|
||||
tkr1 = "QQQ"
|
||||
@@ -241,8 +346,19 @@ class TestPriceHistory(unittest.TestCase):
|
||||
|
||||
def test_monthlyWithEvents2(self):
|
||||
# Simply check no exception from internal merge
|
||||
tkr = "ABBV"
|
||||
yf.Ticker("ABBV").history(period="max", interval="1mo")
|
||||
dfm = yf.Ticker("ABBV").history(period="max", interval="1mo")
|
||||
dfd = yf.Ticker("ABBV").history(period="max", interval="1d")
|
||||
dfd = dfd[dfd.index > dfm.index[0]]
|
||||
dfm_divs = dfm[dfm['Dividends']!=0]
|
||||
dfd_divs = dfd[dfd['Dividends']!=0]
|
||||
self.assertEqual(dfm_divs.shape[0], dfd_divs.shape[0])
|
||||
|
||||
dfm = yf.Ticker("F").history(period="50mo",interval="1mo")
|
||||
dfd = yf.Ticker("F").history(period="50mo", interval="1d")
|
||||
dfd = dfd[dfd.index > dfm.index[0]]
|
||||
dfm_divs = dfm[dfm['Dividends']!=0]
|
||||
dfd_divs = dfd[dfd['Dividends']!=0]
|
||||
self.assertEqual(dfm_divs.shape[0], dfd_divs.shape[0])
|
||||
|
||||
def test_tz_dst_ambiguous(self):
|
||||
# Reproduce issue #1100
|
||||
@@ -437,7 +553,7 @@ class TestPriceRepair(unittest.TestCase):
|
||||
start_dt = end_dt - td_60d
|
||||
df = dat.history(start=start_dt, end=end_dt, interval="2m", repair=True)
|
||||
|
||||
def test_repair_100x_weekly(self):
|
||||
def test_repair_100x_random_weekly(self):
|
||||
# Setup:
|
||||
tkr = "PNL.L"
|
||||
dat = yf.Ticker(tkr, session=self.session)
|
||||
@@ -465,7 +581,7 @@ class TestPriceRepair(unittest.TestCase):
|
||||
|
||||
# Run test
|
||||
|
||||
df_repaired = dat._fix_unit_mixups(df_bad, "1wk", tz_exchange, prepost=False)
|
||||
df_repaired = dat._fix_unit_random_mixups(df_bad, "1wk", tz_exchange, prepost=False, silent=True)
|
||||
|
||||
# First test - no errors left
|
||||
for c in data_cols:
|
||||
@@ -491,7 +607,7 @@ class TestPriceRepair(unittest.TestCase):
|
||||
self.assertTrue("Repaired?" in df_repaired.columns)
|
||||
self.assertFalse(df_repaired["Repaired?"].isna().any())
|
||||
|
||||
def test_repair_100x_weekly_preSplit(self):
|
||||
def test_repair_100x_random_weekly_preSplit(self):
|
||||
# PNL.L has a stock-split in 2022. Sometimes requesting data before 2022 is not split-adjusted.
|
||||
|
||||
tkr = "PNL.L"
|
||||
@@ -523,7 +639,7 @@ class TestPriceRepair(unittest.TestCase):
|
||||
df.index = df.index.tz_localize(tz_exchange)
|
||||
df_bad.index = df_bad.index.tz_localize(tz_exchange)
|
||||
|
||||
df_repaired = dat._fix_unit_mixups(df_bad, "1wk", tz_exchange, prepost=False)
|
||||
df_repaired = dat._fix_unit_random_mixups(df_bad, "1wk", tz_exchange, prepost=False, silent=True)
|
||||
|
||||
# First test - no errors left
|
||||
for c in data_cols:
|
||||
@@ -551,7 +667,7 @@ class TestPriceRepair(unittest.TestCase):
|
||||
self.assertTrue("Repaired?" in df_repaired.columns)
|
||||
self.assertFalse(df_repaired["Repaired?"].isna().any())
|
||||
|
||||
def test_repair_100x_daily(self):
|
||||
def test_repair_100x_random_daily(self):
|
||||
tkr = "PNL.L"
|
||||
dat = yf.Ticker(tkr, session=self.session)
|
||||
tz_exchange = dat.fast_info["timezone"]
|
||||
@@ -576,7 +692,7 @@ class TestPriceRepair(unittest.TestCase):
|
||||
df.index = df.index.tz_localize(tz_exchange)
|
||||
df_bad.index = df_bad.index.tz_localize(tz_exchange)
|
||||
|
||||
df_repaired = dat._fix_unit_mixups(df_bad, "1d", tz_exchange, prepost=False)
|
||||
df_repaired = dat._fix_unit_random_mixups(df_bad, "1d", tz_exchange, prepost=False, silent=True)
|
||||
|
||||
# First test - no errors left
|
||||
for c in data_cols:
|
||||
@@ -596,6 +712,51 @@ class TestPriceRepair(unittest.TestCase):
|
||||
self.assertTrue("Repaired?" in df_repaired.columns)
|
||||
self.assertFalse(df_repaired["Repaired?"].isna().any())
|
||||
|
||||
def test_repair_100x_block_daily(self):
|
||||
# Some 100x errors are not sporadic.
|
||||
# Sometimes Yahoo suddenly shifts from cents->$ from some recent date.
|
||||
|
||||
tkr = "SSW.JO"
|
||||
dat = yf.Ticker(tkr, session=self.session)
|
||||
tz_exchange = dat.fast_info["timezone"]
|
||||
|
||||
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
|
||||
_dp = os.path.dirname(__file__)
|
||||
df_bad = _pd.read_csv(os.path.join(_dp, "data", tkr.replace('.','-')+"-100x-error.csv"), index_col="Date")
|
||||
df_bad.index = _pd.to_datetime(df_bad.index)
|
||||
df_bad = df_bad.sort_index()
|
||||
|
||||
df = df_bad.copy()
|
||||
for d in data_cols:
|
||||
df.loc[:'2023-05-31', d] *= 0.01 # fix error
|
||||
|
||||
df_repaired = dat._fix_unit_switch(df_bad, "1d", tz_exchange)
|
||||
df_repaired = df_repaired.sort_index()
|
||||
|
||||
# First test - no errors left
|
||||
for c in data_cols:
|
||||
try:
|
||||
self.assertTrue(_np.isclose(df_repaired[c], df[c], rtol=1e-2).all())
|
||||
except:
|
||||
print(df_repaired[c])
|
||||
print(df[c])
|
||||
print(f"TEST FAIL on column '{c}")
|
||||
raise
|
||||
|
||||
# Second test - all differences should be either ~1x or ~100x
|
||||
ratio = df_bad[data_cols].values / df[data_cols].values
|
||||
ratio = ratio.round(2)
|
||||
# - round near-100 ratio to 100:
|
||||
f = ratio > 90
|
||||
ratio[f] = (ratio[f] / 10).round().astype(int) * 10 # round ratio to nearest 10
|
||||
# - now test
|
||||
f_100 = ratio == 100
|
||||
f_1 = ratio == 1
|
||||
self.assertTrue((f_100 | f_1).all())
|
||||
|
||||
self.assertTrue("Repaired?" in df_repaired.columns)
|
||||
self.assertFalse(df_repaired["Repaired?"].isna().any())
|
||||
|
||||
def test_repair_zeroes_daily(self):
|
||||
tkr = "BBIL.L"
|
||||
dat = yf.Ticker(tkr, session=self.session)
|
||||
@@ -626,6 +787,42 @@ class TestPriceRepair(unittest.TestCase):
|
||||
self.assertTrue("Repaired?" in repaired_df.columns)
|
||||
self.assertFalse(repaired_df["Repaired?"].isna().any())
|
||||
|
||||
def test_repair_zeroes_daily_adjClose(self):
|
||||
# Test that 'Adj Close' is reconstructed correctly,
|
||||
# particularly when a dividend occurred within 1 day.
|
||||
|
||||
tkr = "INTC"
|
||||
df = _pd.DataFrame(data={"Open": [28.95, 28.65, 29.55, 29.62, 29.25],
|
||||
"High": [29.12, 29.27, 29.65, 31.17, 30.30],
|
||||
"Low": [28.21, 28.43, 28.61, 29.53, 28.80],
|
||||
"Close": [28.24, 29.05, 28.69, 30.32, 30.19],
|
||||
"Adj Close": [28.12, 28.93, 28.57, 29.83, 29.70],
|
||||
"Volume": [36e6, 51e6, 49e6, 58e6, 62e6],
|
||||
"Dividends": [0, 0, 0.365, 0, 0]},
|
||||
index=_pd.to_datetime([_dt.datetime(2023, 2, 8),
|
||||
_dt.datetime(2023, 2, 7),
|
||||
_dt.datetime(2023, 2, 6),
|
||||
_dt.datetime(2023, 2, 3),
|
||||
_dt.datetime(2023, 2, 2)]))
|
||||
df = df.sort_index()
|
||||
df.index.name = "Date"
|
||||
dat = yf.Ticker(tkr, session=self.session)
|
||||
tz_exchange = dat.fast_info["timezone"]
|
||||
df.index = df.index.tz_localize(tz_exchange)
|
||||
|
||||
rtol = 5e-3
|
||||
for i in [0, 1, 2]:
|
||||
df_slice = df.iloc[i:i+3]
|
||||
for j in range(3):
|
||||
df_slice_bad = df_slice.copy()
|
||||
df_slice_bad.loc[df_slice_bad.index[j], "Adj Close"] = 0.0
|
||||
|
||||
df_slice_bad_repaired = dat._fix_zeroes(df_slice_bad, "1d", tz_exchange, prepost=False)
|
||||
for c in ["Close", "Adj Close"]:
|
||||
self.assertTrue(_np.isclose(df_slice_bad_repaired[c], df_slice[c], rtol=rtol).all())
|
||||
self.assertTrue("Repaired?" in df_slice_bad_repaired.columns)
|
||||
self.assertFalse(df_slice_bad_repaired["Repaired?"].isna().any())
|
||||
|
||||
def test_repair_zeroes_hourly(self):
|
||||
tkr = "INTC"
|
||||
dat = yf.Ticker(tkr, session=self.session)
|
||||
@@ -660,5 +857,65 @@ class TestPriceRepair(unittest.TestCase):
|
||||
self.assertTrue("Repaired?" in repaired_df.columns)
|
||||
self.assertFalse(repaired_df["Repaired?"].isna().any())
|
||||
|
||||
def test_repair_bad_stock_split(self):
|
||||
bad_tkrs = ['4063.T', 'ALPHA.PA', 'CNE.L', 'MOB.ST', 'SPM.MI']
|
||||
for tkr in bad_tkrs:
|
||||
dat = yf.Ticker(tkr, session=self.session)
|
||||
tz_exchange = dat.fast_info["timezone"]
|
||||
|
||||
_dp = os.path.dirname(__file__)
|
||||
df_bad = _pd.read_csv(os.path.join(_dp, "data", tkr.replace('.','-')+"-bad-stock-split.csv"), index_col="Date")
|
||||
df_bad.index = _pd.to_datetime(df_bad.index)
|
||||
|
||||
repaired_df = dat._fix_bad_stock_split(df_bad, "1d", tz_exchange)
|
||||
|
||||
correct_df = _pd.read_csv(os.path.join(_dp, "data", tkr.replace('.','-')+"-bad-stock-split-fixed.csv"), index_col="Date")
|
||||
correct_df.index = _pd.to_datetime(correct_df.index)
|
||||
|
||||
repaired_df = repaired_df.sort_index()
|
||||
correct_df = correct_df.sort_index()
|
||||
for c in ["Open", "Low", "High", "Close", "Adj Close", "Volume"]:
|
||||
try:
|
||||
self.assertTrue(_np.isclose(repaired_df[c], correct_df[c], rtol=5e-6).all())
|
||||
except:
|
||||
print(f"tkr={tkr} COLUMN={c}")
|
||||
print("- repaired_df")
|
||||
print(repaired_df)
|
||||
print("- correct_df[c]:")
|
||||
print(correct_df[c])
|
||||
print("- diff:")
|
||||
print(repaired_df[c] - correct_df[c])
|
||||
raise
|
||||
|
||||
# Stocks that split in 2022 but no problems in Yahoo data,
|
||||
# so repair should change nothing
|
||||
good_tkrs = ['AMZN', 'DXCM', 'FTNT', 'GOOG', 'GME', 'PANW', 'SHOP', 'TSLA']
|
||||
good_tkrs += ['AEI', 'CHRA', 'GHI', 'IRON', 'LXU', 'NUZE', 'RSLS', 'TISI']
|
||||
good_tkrs += ['BOL.ST', 'TUI1.DE']
|
||||
intervals = ['1d', '1wk', '1mo', '3mo']
|
||||
for tkr in good_tkrs:
|
||||
for interval in intervals:
|
||||
dat = yf.Ticker(tkr, session=self.session)
|
||||
tz_exchange = dat.fast_info["timezone"]
|
||||
|
||||
_dp = os.path.dirname(__file__)
|
||||
df_good = dat.history(period='2y', interval=interval, auto_adjust=False)
|
||||
|
||||
repaired_df = dat._fix_bad_stock_split(df_good, interval, tz_exchange)
|
||||
|
||||
# Expect no change from repair
|
||||
df_good = df_good.sort_index()
|
||||
repaired_df = repaired_df.sort_index()
|
||||
for c in ["Open", "Low", "High", "Close", "Adj Close", "Volume"]:
|
||||
try:
|
||||
self.assertTrue((repaired_df[c].to_numpy() == df_good[c].to_numpy()).all())
|
||||
except:
|
||||
print(f"tkr={tkr} interval={interval} COLUMN={c}")
|
||||
df_dbg = df_good[[c]].join(repaired_df[[c]], lsuffix='.good', rsuffix='.repaired')
|
||||
f_diff = repaired_df[c].to_numpy() != df_good[c].to_numpy()
|
||||
print(df_dbg[f_diff | _np.roll(f_diff, 1) | _np.roll(f_diff, -1)])
|
||||
raise
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
465
tests/ticker.py
465
tests/ticker.py
@@ -71,19 +71,20 @@ class TestTicker(unittest.TestCase):
|
||||
dat.news
|
||||
dat.earnings_dates
|
||||
|
||||
# These require decryption which is broken:
|
||||
dat.income_stmt
|
||||
dat.quarterly_income_stmt
|
||||
dat.balance_sheet
|
||||
dat.quarterly_balance_sheet
|
||||
dat.cashflow
|
||||
dat.quarterly_cashflow
|
||||
|
||||
# These haven't been ported Yahoo API
|
||||
# dat.shares
|
||||
# dat.info
|
||||
# dat.calendar
|
||||
# dat.recommendations
|
||||
# dat.earnings
|
||||
# dat.quarterly_earnings
|
||||
# dat.income_stmt
|
||||
# dat.quarterly_income_stmt
|
||||
# dat.balance_sheet
|
||||
# dat.quarterly_balance_sheet
|
||||
# dat.cashflow
|
||||
# dat.quarterly_cashflow
|
||||
# dat.recommendations_summary
|
||||
# dat.analyst_price_target
|
||||
# dat.revenue_forecasts
|
||||
@@ -122,6 +123,13 @@ class TestTicker(unittest.TestCase):
|
||||
dat.news
|
||||
dat.earnings_dates
|
||||
|
||||
dat.income_stmt
|
||||
dat.quarterly_income_stmt
|
||||
dat.balance_sheet
|
||||
dat.quarterly_balance_sheet
|
||||
dat.cashflow
|
||||
dat.quarterly_cashflow
|
||||
|
||||
# These require decryption which is broken:
|
||||
# dat.shares
|
||||
# dat.info
|
||||
@@ -129,12 +137,6 @@ class TestTicker(unittest.TestCase):
|
||||
# dat.recommendations
|
||||
# dat.earnings
|
||||
# dat.quarterly_earnings
|
||||
# dat.income_stmt
|
||||
# dat.quarterly_income_stmt
|
||||
# dat.balance_sheet
|
||||
# dat.quarterly_balance_sheet
|
||||
# dat.cashflow
|
||||
# dat.quarterly_cashflow
|
||||
# dat.recommendations_summary
|
||||
# dat.analyst_price_target
|
||||
# dat.revenue_forecasts
|
||||
@@ -211,7 +213,7 @@ class TestTickerHistory(unittest.TestCase):
|
||||
self.assertFalse(data.empty, "data is empty")
|
||||
|
||||
|
||||
# Below will fail because decryption broken
|
||||
# Below will fail because not ported to Yahoo API
|
||||
# class TestTickerEarnings(unittest.TestCase):
|
||||
# session = None
|
||||
|
||||
@@ -367,270 +369,243 @@ class TestTickerMiscFinancials(unittest.TestCase):
|
||||
self.assertIsInstance(data, pd.Series, "data has wrong type")
|
||||
self.assertFalse(data.empty, "data is empty")
|
||||
|
||||
# Below will fail because decryption broken
|
||||
def test_income_statement(self):
|
||||
expected_keys = ["Total Revenue", "Basic EPS"]
|
||||
expected_periods_days = 365
|
||||
|
||||
# def test_income_statement(self):
|
||||
# expected_keys = ["Total Revenue", "Basic EPS"]
|
||||
# expected_periods_days = 365
|
||||
# Test contents of table
|
||||
data = self.ticker.get_income_stmt(pretty=True)
|
||||
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
|
||||
self.assertFalse(data.empty, "data is empty")
|
||||
for k in expected_keys:
|
||||
self.assertIn(k, data.index, "Did not find expected row in index")
|
||||
period = abs((data.columns[0]-data.columns[1]).days)
|
||||
self.assertLess(abs(period-expected_periods_days), 20, "Not returning annual financials")
|
||||
|
||||
# # Test contents of table
|
||||
# data = self.ticker.get_income_stmt(pretty=True)
|
||||
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
|
||||
# self.assertFalse(data.empty, "data is empty")
|
||||
# for k in expected_keys:
|
||||
# self.assertIn(k, data.index, "Did not find expected row in index")
|
||||
# period = abs((data.columns[0]-data.columns[1]).days)
|
||||
# self.assertLess(abs(period-expected_periods_days), 20, "Not returning annual financials")
|
||||
# Test property defaults
|
||||
data2 = self.ticker.income_stmt
|
||||
self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
|
||||
|
||||
# # Test property defaults
|
||||
# data2 = self.ticker.income_stmt
|
||||
# self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
|
||||
# Test pretty=False
|
||||
expected_keys = [k.replace(' ', '') for k in expected_keys]
|
||||
data = self.ticker.get_income_stmt(pretty=False)
|
||||
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
|
||||
self.assertFalse(data.empty, "data is empty")
|
||||
for k in expected_keys:
|
||||
self.assertIn(k, data.index, "Did not find expected row in index")
|
||||
|
||||
# # Test pretty=False
|
||||
# expected_keys = [k.replace(' ', '') for k in expected_keys]
|
||||
# data = self.ticker.get_income_stmt(pretty=False)
|
||||
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
|
||||
# self.assertFalse(data.empty, "data is empty")
|
||||
# for k in expected_keys:
|
||||
# self.assertIn(k, data.index, "Did not find expected row in index")
|
||||
# Test to_dict
|
||||
data = self.ticker.get_income_stmt(as_dict=True)
|
||||
self.assertIsInstance(data, dict, "data has wrong type")
|
||||
|
||||
# # Test to_dict
|
||||
# data = self.ticker.get_income_stmt(as_dict=True)
|
||||
# self.assertIsInstance(data, dict, "data has wrong type")
|
||||
def test_quarterly_income_statement(self):
|
||||
expected_keys = ["Total Revenue", "Basic EPS"]
|
||||
expected_periods_days = 365//4
|
||||
|
||||
# def test_quarterly_income_statement(self):
|
||||
# expected_keys = ["Total Revenue", "Basic EPS"]
|
||||
# expected_periods_days = 365//4
|
||||
# Test contents of table
|
||||
data = self.ticker.get_income_stmt(pretty=True, freq="quarterly")
|
||||
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
|
||||
self.assertFalse(data.empty, "data is empty")
|
||||
for k in expected_keys:
|
||||
self.assertIn(k, data.index, "Did not find expected row in index")
|
||||
period = abs((data.columns[0]-data.columns[1]).days)
|
||||
self.assertLess(abs(period-expected_periods_days), 20, "Not returning quarterly financials")
|
||||
|
||||
# # Test contents of table
|
||||
# data = self.ticker.get_income_stmt(pretty=True, freq="quarterly")
|
||||
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
|
||||
# self.assertFalse(data.empty, "data is empty")
|
||||
# for k in expected_keys:
|
||||
# self.assertIn(k, data.index, "Did not find expected row in index")
|
||||
# period = abs((data.columns[0]-data.columns[1]).days)
|
||||
# self.assertLess(abs(period-expected_periods_days), 20, "Not returning quarterly financials")
|
||||
# Test property defaults
|
||||
data2 = self.ticker.quarterly_income_stmt
|
||||
self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
|
||||
|
||||
# # Test property defaults
|
||||
# data2 = self.ticker.quarterly_income_stmt
|
||||
# self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
|
||||
# Test pretty=False
|
||||
expected_keys = [k.replace(' ', '') for k in expected_keys]
|
||||
data = self.ticker.get_income_stmt(pretty=False, freq="quarterly")
|
||||
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
|
||||
self.assertFalse(data.empty, "data is empty")
|
||||
for k in expected_keys:
|
||||
self.assertIn(k, data.index, "Did not find expected row in index")
|
||||
|
||||
# # Test pretty=False
|
||||
# expected_keys = [k.replace(' ', '') for k in expected_keys]
|
||||
# data = self.ticker.get_income_stmt(pretty=False, freq="quarterly")
|
||||
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
|
||||
# self.assertFalse(data.empty, "data is empty")
|
||||
# for k in expected_keys:
|
||||
# self.assertIn(k, data.index, "Did not find expected row in index")
|
||||
# Test to_dict
|
||||
data = self.ticker.get_income_stmt(as_dict=True)
|
||||
self.assertIsInstance(data, dict, "data has wrong type")
|
||||
|
||||
# # Test to_dict
|
||||
# data = self.ticker.get_income_stmt(as_dict=True)
|
||||
# self.assertIsInstance(data, dict, "data has wrong type")
|
||||
def test_balance_sheet(self):
|
||||
expected_keys = ["Total Assets", "Net PPE"]
|
||||
expected_periods_days = 365
|
||||
|
||||
# def test_quarterly_income_statement_old_fmt(self):
|
||||
# expected_row = "TotalRevenue"
|
||||
# data = self.ticker_old_fmt.get_income_stmt(freq="quarterly", legacy=True)
|
||||
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
|
||||
# self.assertFalse(data.empty, "data is empty")
|
||||
# self.assertIn(expected_row, data.index, "Did not find expected row in index")
|
||||
# Test contents of table
|
||||
data = self.ticker.get_balance_sheet(pretty=True)
|
||||
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
|
||||
self.assertFalse(data.empty, "data is empty")
|
||||
for k in expected_keys:
|
||||
self.assertIn(k, data.index, "Did not find expected row in index")
|
||||
period = abs((data.columns[0]-data.columns[1]).days)
|
||||
self.assertLess(abs(period-expected_periods_days), 20, "Not returning annual financials")
|
||||
|
||||
# data_cached = self.ticker_old_fmt.get_income_stmt(freq="quarterly", legacy=True)
|
||||
# self.assertIs(data, data_cached, "data not cached")
|
||||
# Test property defaults
|
||||
data2 = self.ticker.balance_sheet
|
||||
self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
|
||||
|
||||
# def test_balance_sheet(self):
|
||||
# expected_keys = ["Total Assets", "Net PPE"]
|
||||
# expected_periods_days = 365
|
||||
# Test pretty=False
|
||||
expected_keys = [k.replace(' ', '') for k in expected_keys]
|
||||
data = self.ticker.get_balance_sheet(pretty=False)
|
||||
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
|
||||
self.assertFalse(data.empty, "data is empty")
|
||||
for k in expected_keys:
|
||||
self.assertIn(k, data.index, "Did not find expected row in index")
|
||||
|
||||
# # Test contents of table
|
||||
# data = self.ticker.get_balance_sheet(pretty=True)
|
||||
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
|
||||
# self.assertFalse(data.empty, "data is empty")
|
||||
# for k in expected_keys:
|
||||
# self.assertIn(k, data.index, "Did not find expected row in index")
|
||||
# period = abs((data.columns[0]-data.columns[1]).days)
|
||||
# self.assertLess(abs(period-expected_periods_days), 20, "Not returning annual financials")
|
||||
# Test to_dict
|
||||
data = self.ticker.get_balance_sheet(as_dict=True)
|
||||
self.assertIsInstance(data, dict, "data has wrong type")
|
||||
|
||||
# # Test property defaults
|
||||
# data2 = self.ticker.balance_sheet
|
||||
# self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
|
||||
def test_quarterly_balance_sheet(self):
|
||||
expected_keys = ["Total Assets", "Net PPE"]
|
||||
expected_periods_days = 365//4
|
||||
|
||||
# # Test pretty=False
|
||||
# expected_keys = [k.replace(' ', '') for k in expected_keys]
|
||||
# data = self.ticker.get_balance_sheet(pretty=False)
|
||||
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
|
||||
# self.assertFalse(data.empty, "data is empty")
|
||||
# for k in expected_keys:
|
||||
# self.assertIn(k, data.index, "Did not find expected row in index")
|
||||
# Test contents of table
|
||||
data = self.ticker.get_balance_sheet(pretty=True, freq="quarterly")
|
||||
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
|
||||
self.assertFalse(data.empty, "data is empty")
|
||||
for k in expected_keys:
|
||||
self.assertIn(k, data.index, "Did not find expected row in index")
|
||||
period = abs((data.columns[0]-data.columns[1]).days)
|
||||
self.assertLess(abs(period-expected_periods_days), 20, "Not returning quarterly financials")
|
||||
|
||||
# # Test to_dict
|
||||
# data = self.ticker.get_balance_sheet(as_dict=True)
|
||||
# self.assertIsInstance(data, dict, "data has wrong type")
|
||||
# Test property defaults
|
||||
data2 = self.ticker.quarterly_balance_sheet
|
||||
self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
|
||||
|
||||
# def test_quarterly_balance_sheet(self):
|
||||
# expected_keys = ["Total Assets", "Net PPE"]
|
||||
# expected_periods_days = 365//4
|
||||
# Test pretty=False
|
||||
expected_keys = [k.replace(' ', '') for k in expected_keys]
|
||||
data = self.ticker.get_balance_sheet(pretty=False, freq="quarterly")
|
||||
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
|
||||
self.assertFalse(data.empty, "data is empty")
|
||||
for k in expected_keys:
|
||||
self.assertIn(k, data.index, "Did not find expected row in index")
|
||||
|
||||
# # Test contents of table
|
||||
# data = self.ticker.get_balance_sheet(pretty=True, freq="quarterly")
|
||||
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
|
||||
# self.assertFalse(data.empty, "data is empty")
|
||||
# for k in expected_keys:
|
||||
# self.assertIn(k, data.index, "Did not find expected row in index")
|
||||
# period = abs((data.columns[0]-data.columns[1]).days)
|
||||
# self.assertLess(abs(period-expected_periods_days), 20, "Not returning quarterly financials")
|
||||
# Test to_dict
|
||||
data = self.ticker.get_balance_sheet(as_dict=True, freq="quarterly")
|
||||
self.assertIsInstance(data, dict, "data has wrong type")
|
||||
|
||||
# # Test property defaults
|
||||
# data2 = self.ticker.quarterly_balance_sheet
|
||||
# self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
|
||||
def test_cash_flow(self):
|
||||
expected_keys = ["Operating Cash Flow", "Net PPE Purchase And Sale"]
|
||||
expected_periods_days = 365
|
||||
|
||||
# # Test pretty=False
|
||||
# expected_keys = [k.replace(' ', '') for k in expected_keys]
|
||||
# data = self.ticker.get_balance_sheet(pretty=False, freq="quarterly")
|
||||
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
|
||||
# self.assertFalse(data.empty, "data is empty")
|
||||
# for k in expected_keys:
|
||||
# self.assertIn(k, data.index, "Did not find expected row in index")
|
||||
# Test contents of table
|
||||
data = self.ticker.get_cashflow(pretty=True)
|
||||
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
|
||||
self.assertFalse(data.empty, "data is empty")
|
||||
for k in expected_keys:
|
||||
self.assertIn(k, data.index, "Did not find expected row in index")
|
||||
period = abs((data.columns[0]-data.columns[1]).days)
|
||||
self.assertLess(abs(period-expected_periods_days), 20, "Not returning annual financials")
|
||||
|
||||
# # Test to_dict
|
||||
# data = self.ticker.get_balance_sheet(as_dict=True, freq="quarterly")
|
||||
# self.assertIsInstance(data, dict, "data has wrong type")
|
||||
# Test property defaults
|
||||
data2 = self.ticker.cashflow
|
||||
self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
|
||||
|
||||
# def test_quarterly_balance_sheet_old_fmt(self):
|
||||
# expected_row = "TotalAssets"
|
||||
# data = self.ticker_old_fmt.get_balance_sheet(freq="quarterly", legacy=True)
|
||||
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
|
||||
# self.assertFalse(data.empty, "data is empty")
|
||||
# self.assertIn(expected_row, data.index, "Did not find expected row in index")
|
||||
# Test pretty=False
|
||||
expected_keys = [k.replace(' ', '') for k in expected_keys]
|
||||
data = self.ticker.get_cashflow(pretty=False)
|
||||
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
|
||||
self.assertFalse(data.empty, "data is empty")
|
||||
for k in expected_keys:
|
||||
self.assertIn(k, data.index, "Did not find expected row in index")
|
||||
|
||||
# data_cached = self.ticker_old_fmt.get_balance_sheet(freq="quarterly", legacy=True)
|
||||
# self.assertIs(data, data_cached, "data not cached")
|
||||
# Test to_dict
|
||||
data = self.ticker.get_cashflow(as_dict=True)
|
||||
self.assertIsInstance(data, dict, "data has wrong type")
|
||||
|
||||
# def test_cash_flow(self):
|
||||
# expected_keys = ["Operating Cash Flow", "Net PPE Purchase And Sale"]
|
||||
# expected_periods_days = 365
|
||||
def test_quarterly_cash_flow(self):
|
||||
expected_keys = ["Operating Cash Flow", "Net PPE Purchase And Sale"]
|
||||
expected_periods_days = 365//4
|
||||
|
||||
# # Test contents of table
|
||||
# data = self.ticker.get_cashflow(pretty=True)
|
||||
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
|
||||
# self.assertFalse(data.empty, "data is empty")
|
||||
# for k in expected_keys:
|
||||
# self.assertIn(k, data.index, "Did not find expected row in index")
|
||||
# period = abs((data.columns[0]-data.columns[1]).days)
|
||||
# self.assertLess(abs(period-expected_periods_days), 20, "Not returning annual financials")
|
||||
# Test contents of table
|
||||
data = self.ticker.get_cashflow(pretty=True, freq="quarterly")
|
||||
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
|
||||
self.assertFalse(data.empty, "data is empty")
|
||||
for k in expected_keys:
|
||||
self.assertIn(k, data.index, "Did not find expected row in index")
|
||||
period = abs((data.columns[0]-data.columns[1]).days)
|
||||
self.assertLess(abs(period-expected_periods_days), 20, "Not returning quarterly financials")
|
||||
|
||||
# # Test property defaults
|
||||
# data2 = self.ticker.cashflow
|
||||
# self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
|
||||
# Test property defaults
|
||||
data2 = self.ticker.quarterly_cashflow
|
||||
self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
|
||||
|
||||
# # Test pretty=False
|
||||
# expected_keys = [k.replace(' ', '') for k in expected_keys]
|
||||
# data = self.ticker.get_cashflow(pretty=False)
|
||||
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
|
||||
# self.assertFalse(data.empty, "data is empty")
|
||||
# for k in expected_keys:
|
||||
# self.assertIn(k, data.index, "Did not find expected row in index")
|
||||
# Test pretty=False
|
||||
expected_keys = [k.replace(' ', '') for k in expected_keys]
|
||||
data = self.ticker.get_cashflow(pretty=False, freq="quarterly")
|
||||
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
|
||||
self.assertFalse(data.empty, "data is empty")
|
||||
for k in expected_keys:
|
||||
self.assertIn(k, data.index, "Did not find expected row in index")
|
||||
|
||||
# # Test to_dict
|
||||
# data = self.ticker.get_cashflow(as_dict=True)
|
||||
# self.assertIsInstance(data, dict, "data has wrong type")
|
||||
# Test to_dict
|
||||
data = self.ticker.get_cashflow(as_dict=True)
|
||||
self.assertIsInstance(data, dict, "data has wrong type")
|
||||
|
||||
# def test_quarterly_cash_flow(self):
|
||||
# expected_keys = ["Operating Cash Flow", "Net PPE Purchase And Sale"]
|
||||
# expected_periods_days = 365//4
|
||||
def test_income_alt_names(self):
|
||||
i1 = self.ticker.income_stmt
|
||||
i2 = self.ticker.incomestmt
|
||||
self.assertTrue(i1.equals(i2))
|
||||
i3 = self.ticker.financials
|
||||
self.assertTrue(i1.equals(i3))
|
||||
|
||||
# # Test contents of table
|
||||
# data = self.ticker.get_cashflow(pretty=True, freq="quarterly")
|
||||
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
|
||||
# self.assertFalse(data.empty, "data is empty")
|
||||
# for k in expected_keys:
|
||||
# self.assertIn(k, data.index, "Did not find expected row in index")
|
||||
# period = abs((data.columns[0]-data.columns[1]).days)
|
||||
# self.assertLess(abs(period-expected_periods_days), 20, "Not returning quarterly financials")
|
||||
i1 = self.ticker.get_income_stmt()
|
||||
i2 = self.ticker.get_incomestmt()
|
||||
self.assertTrue(i1.equals(i2))
|
||||
i3 = self.ticker.get_financials()
|
||||
self.assertTrue(i1.equals(i3))
|
||||
|
||||
# # Test property defaults
|
||||
# data2 = self.ticker.quarterly_cashflow
|
||||
# self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
|
||||
i1 = self.ticker.quarterly_income_stmt
|
||||
i2 = self.ticker.quarterly_incomestmt
|
||||
self.assertTrue(i1.equals(i2))
|
||||
i3 = self.ticker.quarterly_financials
|
||||
self.assertTrue(i1.equals(i3))
|
||||
|
||||
# # Test pretty=False
|
||||
# expected_keys = [k.replace(' ', '') for k in expected_keys]
|
||||
# data = self.ticker.get_cashflow(pretty=False, freq="quarterly")
|
||||
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
|
||||
# self.assertFalse(data.empty, "data is empty")
|
||||
# for k in expected_keys:
|
||||
# self.assertIn(k, data.index, "Did not find expected row in index")
|
||||
i1 = self.ticker.get_income_stmt(freq="quarterly")
|
||||
i2 = self.ticker.get_incomestmt(freq="quarterly")
|
||||
self.assertTrue(i1.equals(i2))
|
||||
i3 = self.ticker.get_financials(freq="quarterly")
|
||||
self.assertTrue(i1.equals(i3))
|
||||
|
||||
# # Test to_dict
|
||||
# data = self.ticker.get_cashflow(as_dict=True)
|
||||
# self.assertIsInstance(data, dict, "data has wrong type")
|
||||
def test_balance_sheet_alt_names(self):
|
||||
i1 = self.ticker.balance_sheet
|
||||
i2 = self.ticker.balancesheet
|
||||
self.assertTrue(i1.equals(i2))
|
||||
|
||||
# def test_quarterly_cashflow_old_fmt(self):
|
||||
# expected_row = "NetIncome"
|
||||
# data = self.ticker_old_fmt.get_cashflow(legacy=True, freq="quarterly")
|
||||
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
|
||||
# self.assertFalse(data.empty, "data is empty")
|
||||
# self.assertIn(expected_row, data.index, "Did not find expected row in index")
|
||||
i1 = self.ticker.get_balance_sheet()
|
||||
i2 = self.ticker.get_balancesheet()
|
||||
self.assertTrue(i1.equals(i2))
|
||||
|
||||
# data_cached = self.ticker_old_fmt.get_cashflow(legacy=True, freq="quarterly")
|
||||
# self.assertIs(data, data_cached, "data not cached")
|
||||
i1 = self.ticker.quarterly_balance_sheet
|
||||
i2 = self.ticker.quarterly_balancesheet
|
||||
self.assertTrue(i1.equals(i2))
|
||||
|
||||
# def test_income_alt_names(self):
|
||||
# i1 = self.ticker.income_stmt
|
||||
# i2 = self.ticker.incomestmt
|
||||
# self.assertTrue(i1.equals(i2))
|
||||
# i3 = self.ticker.financials
|
||||
# self.assertTrue(i1.equals(i3))
|
||||
i1 = self.ticker.get_balance_sheet(freq="quarterly")
|
||||
i2 = self.ticker.get_balancesheet(freq="quarterly")
|
||||
self.assertTrue(i1.equals(i2))
|
||||
|
||||
# i1 = self.ticker.get_income_stmt()
|
||||
# i2 = self.ticker.get_incomestmt()
|
||||
# self.assertTrue(i1.equals(i2))
|
||||
# i3 = self.ticker.get_financials()
|
||||
# self.assertTrue(i1.equals(i3))
|
||||
def test_cash_flow_alt_names(self):
|
||||
i1 = self.ticker.cash_flow
|
||||
i2 = self.ticker.cashflow
|
||||
self.assertTrue(i1.equals(i2))
|
||||
|
||||
# i1 = self.ticker.quarterly_income_stmt
|
||||
# i2 = self.ticker.quarterly_incomestmt
|
||||
# self.assertTrue(i1.equals(i2))
|
||||
# i3 = self.ticker.quarterly_financials
|
||||
# self.assertTrue(i1.equals(i3))
|
||||
i1 = self.ticker.get_cash_flow()
|
||||
i2 = self.ticker.get_cashflow()
|
||||
self.assertTrue(i1.equals(i2))
|
||||
|
||||
# i1 = self.ticker.get_income_stmt(freq="quarterly")
|
||||
# i2 = self.ticker.get_incomestmt(freq="quarterly")
|
||||
# self.assertTrue(i1.equals(i2))
|
||||
# i3 = self.ticker.get_financials(freq="quarterly")
|
||||
# self.assertTrue(i1.equals(i3))
|
||||
i1 = self.ticker.quarterly_cash_flow
|
||||
i2 = self.ticker.quarterly_cashflow
|
||||
self.assertTrue(i1.equals(i2))
|
||||
|
||||
# def test_balance_sheet_alt_names(self):
|
||||
# i1 = self.ticker.balance_sheet
|
||||
# i2 = self.ticker.balancesheet
|
||||
# self.assertTrue(i1.equals(i2))
|
||||
i1 = self.ticker.get_cash_flow(freq="quarterly")
|
||||
i2 = self.ticker.get_cashflow(freq="quarterly")
|
||||
self.assertTrue(i1.equals(i2))
|
||||
|
||||
# i1 = self.ticker.get_balance_sheet()
|
||||
# i2 = self.ticker.get_balancesheet()
|
||||
# self.assertTrue(i1.equals(i2))
|
||||
def test_bad_freq_value_raises_exception(self):
|
||||
self.assertRaises(ValueError, lambda: self.ticker.get_cashflow(freq="badarg"))
|
||||
|
||||
# i1 = self.ticker.quarterly_balance_sheet
|
||||
# i2 = self.ticker.quarterly_balancesheet
|
||||
# self.assertTrue(i1.equals(i2))
|
||||
|
||||
# i1 = self.ticker.get_balance_sheet(freq="quarterly")
|
||||
# i2 = self.ticker.get_balancesheet(freq="quarterly")
|
||||
# self.assertTrue(i1.equals(i2))
|
||||
|
||||
# def test_cash_flow_alt_names(self):
|
||||
# i1 = self.ticker.cash_flow
|
||||
# i2 = self.ticker.cashflow
|
||||
# self.assertTrue(i1.equals(i2))
|
||||
|
||||
# i1 = self.ticker.get_cash_flow()
|
||||
# i2 = self.ticker.get_cashflow()
|
||||
# self.assertTrue(i1.equals(i2))
|
||||
|
||||
# i1 = self.ticker.quarterly_cash_flow
|
||||
# i2 = self.ticker.quarterly_cashflow
|
||||
# self.assertTrue(i1.equals(i2))
|
||||
|
||||
# i1 = self.ticker.get_cash_flow(freq="quarterly")
|
||||
# i2 = self.ticker.get_cashflow(freq="quarterly")
|
||||
# self.assertTrue(i1.equals(i2))
|
||||
# Below will fail because not ported to Yahoo API
|
||||
|
||||
# def test_sustainability(self):
|
||||
# data = self.ticker.sustainability
|
||||
@@ -685,9 +660,6 @@ class TestTickerMiscFinancials(unittest.TestCase):
|
||||
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
|
||||
# self.assertFalse(data.empty, "data is empty")
|
||||
|
||||
# def test_bad_freq_value_raises_exception(self):
|
||||
# self.assertRaises(ValueError, lambda: self.ticker.get_cashflow(freq="badarg"))
|
||||
|
||||
|
||||
class TestTickerInfo(unittest.TestCase):
|
||||
session = None
|
||||
@@ -717,17 +689,16 @@ class TestTickerInfo(unittest.TestCase):
|
||||
for k in f:
|
||||
self.assertIsNotNone(f[k])
|
||||
|
||||
# Below will fail because decryption broken
|
||||
|
||||
# def test_info(self):
|
||||
# data = self.tickers[0].info
|
||||
# self.assertIsInstance(data, dict, "data has wrong type")
|
||||
# self.assertIn("symbol", data.keys(), "Did not find expected key in info dict")
|
||||
# self.assertEqual(self.symbols[0], data["symbol"], "Wrong symbol value in info dict")
|
||||
def test_info(self):
|
||||
data = self.tickers[0].info
|
||||
self.assertIsInstance(data, dict, "data has wrong type")
|
||||
expected_keys = ['industry', 'currentPrice', 'exchange', 'floatShares', 'companyOfficers', 'bid']
|
||||
for k in expected_keys:
|
||||
print(k)
|
||||
self.assertIn("symbol", data.keys(), f"Did not find expected key '{k}' in info dict")
|
||||
self.assertEqual(self.symbols[0], data["symbol"], "Wrong symbol value in info dict")
|
||||
|
||||
# def test_fast_info_matches_info(self):
|
||||
# yf.scrapers.quote.PRUNE_INFO = False
|
||||
|
||||
# fast_info_keys = set()
|
||||
# for ticker in self.tickers:
|
||||
# fast_info_keys.update(set(ticker.fast_info.keys()))
|
||||
|
||||
@@ -23,7 +23,7 @@ from . import version
|
||||
from .ticker import Ticker
|
||||
from .tickers import Tickers
|
||||
from .multi import download
|
||||
from .utils import set_tz_cache_location
|
||||
from .utils import set_tz_cache_location, enable_debug_mode
|
||||
|
||||
__version__ = version.version
|
||||
__author__ = "Ran Aroussi"
|
||||
@@ -43,4 +43,4 @@ def pdr_override():
|
||||
pass
|
||||
|
||||
|
||||
__all__ = ['download', 'Ticker', 'Tickers', 'pdr_override', 'set_tz_cache_location']
|
||||
__all__ = ['download', 'Ticker', 'Tickers', 'pdr_override', 'enable_debug_mode', 'set_tz_cache_location']
|
||||
|
||||
535
yfinance/base.py
535
yfinance/base.py
@@ -45,10 +45,8 @@ from .scrapers.quote import Quote, FastInfo
|
||||
import json as _json
|
||||
|
||||
import logging
|
||||
logger = utils.get_yf_logger()
|
||||
|
||||
_BASE_URL_ = 'https://query2.finance.yahoo.com'
|
||||
_SCRAPE_URL_ = 'https://finance.yahoo.com/quote'
|
||||
_ROOT_URL_ = 'https://finance.yahoo.com'
|
||||
|
||||
class TickerBase:
|
||||
@@ -59,7 +57,6 @@ class TickerBase:
|
||||
self._history_metadata = None
|
||||
self._history_metadata_formatted = False
|
||||
self._base_url = _BASE_URL_
|
||||
self._scrape_url = _SCRAPE_URL_
|
||||
self._tz = None
|
||||
|
||||
self._isin = None
|
||||
@@ -84,13 +81,10 @@ class TickerBase:
|
||||
|
||||
self._fast_info = None
|
||||
|
||||
def stats(self, proxy=None):
|
||||
ticker_url = "{}/{}".format(self._scrape_url, self.ticker)
|
||||
|
||||
# get info and sustainability
|
||||
data = self._data.get_json_data_stores(proxy=proxy)["QuoteSummaryStore"]
|
||||
return data
|
||||
# Limit recursion depth when repairing prices
|
||||
self._reconstruct_start_interval = None
|
||||
|
||||
@utils.log_indent_decorator
|
||||
def history(self, period="1mo", interval="1d",
|
||||
start=None, end=None, prepost=False, actions=True,
|
||||
auto_adjust=True, back_adjust=False, repair=False, keepna=False,
|
||||
@@ -142,6 +136,7 @@ class TickerBase:
|
||||
raise_errors: bool
|
||||
If True, then raise errors as Exceptions instead of logging.
|
||||
"""
|
||||
logger = utils.get_yf_logger()
|
||||
|
||||
if debug is not None:
|
||||
if debug:
|
||||
@@ -412,6 +407,9 @@ class TickerBase:
|
||||
logger.debug(f'{self.ticker}: checking OHLC for repairs ...')
|
||||
df = self._fix_zeroes(df, interval, tz_exchange, prepost, silent=(repair=="silent"))
|
||||
df = self._fix_unit_mixups(df, interval, tz_exchange, prepost, silent=(repair=="silent"))
|
||||
df = self._fix_missing_div_adjust(df, interval, tz_exchange)
|
||||
df = self._fix_bad_stock_split(df, interval, tz_exchange)
|
||||
df = df.sort_index()
|
||||
|
||||
# Auto/back adjust
|
||||
try:
|
||||
@@ -456,15 +454,17 @@ class TickerBase:
|
||||
|
||||
# ------------------------
|
||||
|
||||
@utils.log_indent_decorator
|
||||
def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1, silent=False):
|
||||
# Reconstruct values in df using finer-grained price data. Delimiter marks what to reconstruct
|
||||
logger = utils.get_yf_logger()
|
||||
|
||||
if not isinstance(df, _pd.DataFrame):
|
||||
raise Exception("'df' must be a Pandas DataFrame not", type(df))
|
||||
if interval == "1m":
|
||||
# Can't go smaller than 1m so can't reconstruct
|
||||
return df
|
||||
|
||||
# Reconstruct values in df using finer-grained price data. Delimiter marks what to reconstruct
|
||||
|
||||
if interval[1:] in ['d', 'wk', 'mo']:
|
||||
# Interday data always includes pre & post
|
||||
prepost = True
|
||||
@@ -493,6 +493,13 @@ class TickerBase:
|
||||
df["Repaired?"] = False
|
||||
return df
|
||||
|
||||
# Limit max reconstruction depth to 2:
|
||||
if self._reconstruct_start_interval is None:
|
||||
self._reconstruct_start_interval = interval
|
||||
if interval not in [self._reconstruct_start_interval, nexts[self._reconstruct_start_interval]]:
|
||||
logger.debug(f"{self.ticker}: Price repair has hit max depth of 2 ('%s'->'%s'->'%s')", self._reconstruct_start_interval, interval, sub_interval)
|
||||
return df
|
||||
|
||||
df = df.sort_index()
|
||||
|
||||
f_repair = df[data_cols].to_numpy()==tag
|
||||
@@ -586,8 +593,7 @@ class TickerBase:
|
||||
n_fixed = 0
|
||||
for g in dts_groups:
|
||||
df_block = df[df.index.isin(g)]
|
||||
logger.debug("df_block:")
|
||||
logger.debug(df_block)
|
||||
logger.debug("df_block:\n" + str(df_block))
|
||||
|
||||
start_dt = g[0]
|
||||
start_d = start_dt.date()
|
||||
@@ -630,7 +636,7 @@ class TickerBase:
|
||||
fetch_start = max(min_dt.date(), fetch_start)
|
||||
logger.debug(f"Fetching {sub_interval} prepost={prepost} {fetch_start}->{fetch_end}")
|
||||
r = "silent" if silent else True
|
||||
df_fine = self.history(start=fetch_start, end=fetch_end, interval=sub_interval, auto_adjust=False, actions=False, prepost=prepost, repair=r, keepna=True)
|
||||
df_fine = self.history(start=fetch_start, end=fetch_end, interval=sub_interval, auto_adjust=False, actions=True, prepost=prepost, repair=r, keepna=True)
|
||||
if df_fine is None or df_fine.empty:
|
||||
if not silent:
|
||||
msg = f"Cannot reconstruct {interval} block starting"
|
||||
@@ -668,7 +674,7 @@ class TickerBase:
|
||||
df_fine["intervalID"] = df_fine["ctr"].cumsum()
|
||||
df_fine = df_fine.drop("ctr", axis=1)
|
||||
grp_col = "intervalID"
|
||||
df_fine = df_fine[~df_fine[price_cols].isna().all(axis=1)]
|
||||
df_fine = df_fine[~df_fine[price_cols+['Dividends']].isna().all(axis=1)]
|
||||
|
||||
df_fine_grp = df_fine.groupby(grp_col)
|
||||
df_new = df_fine_grp.agg(
|
||||
@@ -677,6 +683,7 @@ class TickerBase:
|
||||
AdjClose=("Adj Close", "last"),
|
||||
Low=("Low", "min"),
|
||||
High=("High", "max"),
|
||||
Dividends=("Dividends", "sum"),
|
||||
Volume=("Volume", "sum")).rename(columns={"AdjClose":"Adj Close"})
|
||||
if grp_col in ["Week Start", "Day Start"]:
|
||||
df_new.index = df_new.index.tz_localize(df_fine.index.tz)
|
||||
@@ -685,16 +692,53 @@ class TickerBase:
|
||||
new_index = _np.append([df_fine.index[0]], df_fine.index[df_fine["intervalID"].diff()>0])
|
||||
df_new.index = new_index
|
||||
|
||||
logger.debug("df_new:")
|
||||
logger.debug(df_new)
|
||||
|
||||
# Calibrate! Check whether 'df_fine' has different split-adjustment.
|
||||
# If different, then adjust to match 'df'
|
||||
# Calibrate!
|
||||
common_index = _np.intersect1d(df_block.index, df_new.index)
|
||||
if len(common_index) == 0:
|
||||
# Can't calibrate so don't attempt repair
|
||||
logger.warning(f"Can't calibrate {interval} block starting {start_d} so aborting repair")
|
||||
continue
|
||||
# First, attempt to calibrate the 'Adj Close' column. OK if cannot.
|
||||
# Only necessary for 1d interval, because the 1h data is not div-adjusted.
|
||||
if interval == '1d':
|
||||
df_new_calib = df_new[df_new.index.isin(common_index)]
|
||||
df_block_calib = df_block[df_block.index.isin(common_index)]
|
||||
f_tag = df_block_calib['Adj Close'] == tag
|
||||
if f_tag.any():
|
||||
div_adjusts = df_block_calib['Adj Close'] / df_block_calib['Close']
|
||||
# The loop below assumes each 1d repair is isoloated, i.e. surrounded by
|
||||
# good data. Which is case most of time.
|
||||
# But in case are repairing a chunk of bad 1d data, back/forward-fill the
|
||||
# good div-adjustments - not perfect, but a good backup.
|
||||
div_adjusts[f_tag] = _np.nan
|
||||
div_adjusts = div_adjusts.fillna(method='bfill').fillna(method='ffill')
|
||||
for idx in _np.where(f_tag)[0]:
|
||||
dt = df_new_calib.index[idx]
|
||||
n = len(div_adjusts)
|
||||
if df_new.loc[dt, "Dividends"] != 0:
|
||||
if idx < n-1:
|
||||
# Easy, take div-adjustment from next-day
|
||||
div_adjusts[idx] = div_adjusts[idx+1]
|
||||
else:
|
||||
# Take previous-day div-adjustment and reverse todays adjustment
|
||||
div_adj = 1.0 - df_new_calib["Dividends"].iloc[idx] / df_new_calib['Close'].iloc[idx-1]
|
||||
div_adjusts[idx] = div_adjusts[idx-1] / div_adj
|
||||
else:
|
||||
if idx > 0:
|
||||
# Easy, take div-adjustment from previous-day
|
||||
div_adjusts[idx] = div_adjusts[idx-1]
|
||||
else:
|
||||
# Must take next-day div-adjustment
|
||||
div_adjusts[idx] = div_adjusts[idx+1]
|
||||
if df_new_calib["Dividends"].iloc[idx+1] != 0:
|
||||
div_adjusts[idx] *= 1.0 - df_new_calib["Dividends"].iloc[idx+1] / df_new_calib['Close'].iloc[idx]
|
||||
f_close_bad = df_block_calib['Close'] == tag
|
||||
df_new['Adj Close'] = df_block['Close'] * div_adjusts
|
||||
if f_close_bad.any():
|
||||
df_new.loc[f_close_bad, 'Adj Close'] = df_new['Close'][f_close_bad] * div_adjusts[f_close_bad]
|
||||
|
||||
# Check whether 'df_fine' has different split-adjustment.
|
||||
# If different, then adjust to match 'df'
|
||||
df_new_calib = df_new[df_new.index.isin(common_index)][price_cols].to_numpy()
|
||||
df_block_calib = df_block[df_block.index.isin(common_index)][price_cols].to_numpy()
|
||||
calib_filter = (df_block_calib != tag)
|
||||
@@ -716,7 +760,7 @@ class TickerBase:
|
||||
weights = _np.tile(weights, len(price_cols)) # 1D -> 2D
|
||||
weights = weights[calib_filter] # flatten
|
||||
ratio = _np.average(ratios, weights=weights)
|
||||
logger.debug(f"Price calibration ratio (raw) = {ratio}")
|
||||
logger.debug(f"Price calibration ratio (raw) = {ratio:6f}")
|
||||
ratio_rcp = round(1.0 / ratio, 1)
|
||||
ratio = round(ratio, 1)
|
||||
if ratio == 1 and ratio_rcp == 1:
|
||||
@@ -744,8 +788,7 @@ class TickerBase:
|
||||
# so probably no trading happened.
|
||||
no_fine_data_dts.append(idx)
|
||||
if len(no_fine_data_dts) > 0:
|
||||
logger.debug(f"Yahoo didn't return finer-grain data for these intervals:")
|
||||
logger.debug(no_fine_data_dts)
|
||||
logger.debug(f"Yahoo didn't return finer-grain data for these intervals: " + str(no_fine_data_dts))
|
||||
for idx in bad_dts:
|
||||
if not idx in df_new.index:
|
||||
# Yahoo didn't return finer-grain data for this interval,
|
||||
@@ -774,20 +817,32 @@ class TickerBase:
|
||||
df_v2.loc[idx, "Close"] = df_new_row["Close"]
|
||||
# Assume 'Adj Close' also corrupted, easier than detecting whether true
|
||||
df_v2.loc[idx, "Adj Close"] = df_new_row["Adj Close"]
|
||||
elif "Adj Close" in bad_fields:
|
||||
df_v2.loc[idx, "Adj Close"] = df_new_row["Adj Close"]
|
||||
if "Volume" in bad_fields:
|
||||
df_v2.loc[idx, "Volume"] = df_new_row["Volume"]
|
||||
df_v2.loc[idx, "Repaired?"] = True
|
||||
n_fixed += 1
|
||||
|
||||
logger.debug("df_v2:")
|
||||
logger.debug(df_v2)
|
||||
|
||||
return df_v2
|
||||
|
||||
@utils.log_indent_decorator
|
||||
def _fix_unit_mixups(self, df, interval, tz_exchange, prepost, silent=False):
|
||||
df2 = self._fix_unit_switch(df, interval, tz_exchange)
|
||||
df3 = self._fix_unit_random_mixups(df2, interval, tz_exchange, prepost, silent)
|
||||
return df3
|
||||
|
||||
@utils.log_indent_decorator
|
||||
def _fix_unit_random_mixups(self, df, interval, tz_exchange, prepost, silent=False):
|
||||
# Sometimes Yahoo returns few prices in cents/pence instead of $/£
|
||||
# I.e. 100x bigger
|
||||
# 2 ways this manifests:
|
||||
# - random 100x errors spread throughout table
|
||||
# - a sudden switch between $<->cents at some date
|
||||
# This function fixes the first.
|
||||
|
||||
# Easy to detect and fix, just look for outliers = ~100x local median
|
||||
logger = utils.get_yf_logger()
|
||||
|
||||
if df.shape[0] == 0:
|
||||
if not "Repaired?" in df.columns:
|
||||
@@ -795,14 +850,14 @@ class TickerBase:
|
||||
return df
|
||||
if df.shape[0] == 1:
|
||||
# Need multiple rows to confidently identify outliers
|
||||
logger.warning("Cannot check single-row table for 100x price errors")
|
||||
logger.warning("price-repair-100x: Cannot check single-row table for 100x price errors")
|
||||
if not "Repaired?" in df.columns:
|
||||
df["Repaired?"] = False
|
||||
return df
|
||||
|
||||
df2 = df.copy()
|
||||
|
||||
if df.index.tz is None:
|
||||
if df2.index.tz is None:
|
||||
df2.index = df2.index.tz_localize(tz_exchange)
|
||||
elif df2.index.tz != tz_exchange:
|
||||
df2.index = df2.index.tz_convert(tz_exchange)
|
||||
@@ -820,7 +875,7 @@ class TickerBase:
|
||||
else:
|
||||
df2_zeroes = None
|
||||
if df2.shape[0] <= 1:
|
||||
logger.warning("Insufficient good data for detecting 100x price errors")
|
||||
logger.warning("price-repair-100x: Insufficient good data for detecting 100x price errors")
|
||||
if not "Repaired?" in df.columns:
|
||||
df["Repaired?"] = False
|
||||
return df
|
||||
@@ -830,7 +885,7 @@ class TickerBase:
|
||||
ratio_rounded = (ratio / 20).round() * 20 # round ratio to nearest 20
|
||||
f = ratio_rounded == 100
|
||||
if not f.any():
|
||||
logger.info("No bad data (100x wrong) to repair")
|
||||
logger.info("price-repair-100x: No sporadic 100x errors")
|
||||
if not "Repaired?" in df.columns:
|
||||
df["Repaired?"] = False
|
||||
return df
|
||||
@@ -894,7 +949,7 @@ class TickerBase:
|
||||
if n_fixed_crudely > 0:
|
||||
report_msg += f"({n_fixed_crudely} crudely) "
|
||||
report_msg += f"in {interval} price data"
|
||||
logger.info('%s', report_msg)
|
||||
logger.info('price-repair-100x: ' + report_msg)
|
||||
|
||||
# Restore original values where repair failed
|
||||
f = df2_tagged
|
||||
@@ -911,11 +966,34 @@ class TickerBase:
|
||||
|
||||
return df2
|
||||
|
||||
@utils.log_indent_decorator
|
||||
def _fix_unit_switch(self, df, interval, tz_exchange):
|
||||
# Sometimes Yahoo returns few prices in cents/pence instead of $/£
|
||||
# I.e. 100x bigger
|
||||
# 2 ways this manifests:
|
||||
# - random 100x errors spread throughout table
|
||||
# - a sudden switch between $<->cents at some date
|
||||
# This function fixes the second.
|
||||
# Eventually Yahoo fixes but could take them 2 weeks.
|
||||
|
||||
# To detect, use 'bad split adjustment' algorithm. But only correct
|
||||
# if no stock splits in data
|
||||
|
||||
f_splits = df['Stock Splits'].to_numpy() != 0.0
|
||||
if f_splits.any():
|
||||
utils.get_yf_logger().debug('price-repair-100x: Cannot check for chunked 100x errors because splits present')
|
||||
return df
|
||||
|
||||
return self._fix_prices_sudden_change(df, interval, tz_exchange, 100.0)
|
||||
|
||||
@utils.log_indent_decorator
|
||||
def _fix_zeroes(self, df, interval, tz_exchange, prepost, silent=False):
|
||||
# Sometimes Yahoo returns prices=0 or NaN when trades occurred.
|
||||
# But most times when prices=0 or NaN returned is because no trades.
|
||||
# Impossible to distinguish, so only attempt repair if few or rare.
|
||||
|
||||
logger = utils.get_yf_logger()
|
||||
|
||||
if df.shape[0] == 0:
|
||||
if not "Repaired?" in df.columns:
|
||||
df["Repaired?"] = False
|
||||
@@ -948,17 +1026,26 @@ class TickerBase:
|
||||
f_change = df2["High"].to_numpy() != df2["Low"].to_numpy()
|
||||
f_vol_bad = (df2["Volume"]==0).to_numpy() & f_high_low_good & f_change
|
||||
|
||||
# If stock split occurred, then trading must have happened.
|
||||
# I should probably rename the function, because prices aren't zero ...
|
||||
if 'Stock Splits' in df2.columns:
|
||||
f_split = (df2['Stock Splits'] != 0.0).to_numpy()
|
||||
if f_split.any():
|
||||
f_change_expected_but_missing = f_split & ~f_change
|
||||
if f_change_expected_but_missing.any():
|
||||
f_prices_bad[f_change_expected_but_missing] = True
|
||||
|
||||
# Check whether worth attempting repair
|
||||
f_prices_bad = f_prices_bad.to_numpy()
|
||||
f_bad_rows = f_prices_bad.any(axis=1) | f_vol_bad
|
||||
if not f_bad_rows.any():
|
||||
logger.info("No bad data (price=0) to repair")
|
||||
logger.info("price-repair-missing: No price=0 errors to repair")
|
||||
if not "Repaired?" in df.columns:
|
||||
df["Repaired?"] = False
|
||||
return df
|
||||
if f_prices_bad.sum() == len(price_cols)*len(df2):
|
||||
# Need some good data to calibrate
|
||||
logger.warning("No good data for calibration so cannot fix price=0 bad data")
|
||||
logger.warning("price-repair-missing: No good data for calibration so cannot fix price=0 bad data")
|
||||
if not "Repaired?" in df.columns:
|
||||
df["Repaired?"] = False
|
||||
return df
|
||||
@@ -980,32 +1067,341 @@ class TickerBase:
|
||||
df2_tagged = df2[data_cols].to_numpy()==tag
|
||||
n_before = df2_tagged.sum()
|
||||
dts_tagged = df2.index[df2_tagged.any(axis=1)]
|
||||
df3 = self._reconstruct_intervals_batch(df2, interval, prepost, tag, silent)
|
||||
df3_tagged = df3[data_cols].to_numpy()==tag
|
||||
n_after = df3_tagged.sum()
|
||||
dts_not_repaired = df3.index[df3_tagged.any(axis=1)]
|
||||
df2 = self._reconstruct_intervals_batch(df2, interval, prepost, tag, silent)
|
||||
df2_tagged = df2[data_cols].to_numpy()==tag
|
||||
n_after = df2_tagged.sum()
|
||||
dts_not_repaired = df2.index[df2_tagged.any(axis=1)]
|
||||
n_fixed = n_before - n_after
|
||||
if not silent and n_fixed > 0:
|
||||
msg = f"{self.ticker}: fixed {n_fixed}/{n_before} value=0 errors in {interval} price data"
|
||||
if n_fixed < 4:
|
||||
dts_repaired = sorted(list(set(dts_tagged).difference(dts_not_repaired)))
|
||||
msg += f": {dts_repaired}"
|
||||
logger.info('%s', msg)
|
||||
logger.info('price-repair-missing: ' + msg)
|
||||
|
||||
if df2_reserve is not None:
|
||||
if not "Repaired?" in df2_reserve.columns:
|
||||
df2_reserve["Repaired?"] = False
|
||||
df3 = _pd.concat([df3, df2_reserve]).sort_index()
|
||||
df2 = _pd.concat([df2, df2_reserve]).sort_index()
|
||||
|
||||
# Restore original values where repair failed (i.e. remove tag values)
|
||||
f = df3[data_cols].to_numpy()==tag
|
||||
f = df2[data_cols].to_numpy()==tag
|
||||
for j in range(len(data_cols)):
|
||||
fj = f[:,j]
|
||||
if fj.any():
|
||||
c = data_cols[j]
|
||||
df3.loc[fj, c] = df.loc[fj, c]
|
||||
df2.loc[fj, c] = df.loc[fj, c]
|
||||
|
||||
return df2
|
||||
|
||||
@utils.log_indent_decorator
|
||||
def _fix_missing_div_adjust(self, df, interval, tz_exchange):
|
||||
# Sometimes, if a dividend occurred today, then Yahoo has not adjusted historic data.
|
||||
# Easy to detect and correct BUT ONLY IF the data 'df' includes today's dividend.
|
||||
# E.g. if fetching historic prices before todays dividend, then cannot fix.
|
||||
|
||||
logger = utils.get_yf_logger()
|
||||
|
||||
if df is None or df.empty:
|
||||
return df
|
||||
interday = interval in ['1d', '1wk', '1mo', '3mo']
|
||||
if not interday:
|
||||
return df
|
||||
|
||||
df = df.sort_index()
|
||||
|
||||
f_div = (df["Dividends"] != 0.0).to_numpy()
|
||||
if not f_div.any():
|
||||
return df
|
||||
|
||||
df2 = df.copy()
|
||||
if df2.index.tz is None:
|
||||
df2.index = df2.index.tz_localize(tz_exchange)
|
||||
elif df2.index.tz != tz_exchange:
|
||||
df2.index = df2.index.tz_convert(tz_exchange)
|
||||
|
||||
div_indices = _np.where(f_div)[0]
|
||||
last_div_idx = div_indices[-1]
|
||||
if last_div_idx == 0:
|
||||
# Not enough data to recalculate the div-adjustment,
|
||||
# because need close day before
|
||||
return df
|
||||
|
||||
# To determine if Yahoo messed up, analyse price data between today's dividend and
|
||||
# the previous dividend
|
||||
if len(div_indices) == 1:
|
||||
# No other divs in data
|
||||
start_idx = 0
|
||||
else:
|
||||
start_idx = div_indices[-2]
|
||||
start_dt = df2.index[start_idx]
|
||||
f_no_adj = (df2['Close']==df2['Adj Close']).to_numpy()[start_idx:last_div_idx]
|
||||
threshold_pct = 0.5
|
||||
Yahoo_failed = (_np.sum(f_no_adj) / len(f_no_adj)) > threshold_pct
|
||||
|
||||
# Fix Yahoo
|
||||
if Yahoo_failed:
|
||||
last_div_dt = df2.index[last_div_idx]
|
||||
last_div_row = df2.loc[last_div_dt]
|
||||
close_day_before = df2['Close'].iloc[last_div_idx-1]
|
||||
adj = 1.0 - df2['Dividends'].iloc[last_div_idx] / close_day_before
|
||||
|
||||
df2.loc[start_dt:last_div_dt, 'Adj Close'] = adj * df2.loc[start_dt:last_div_dt, 'Close']
|
||||
df2.loc[:start_dt-_datetime.timedelta(seconds=1), 'Adj Close'] *= adj
|
||||
|
||||
return df2
|
||||
|
||||
@utils.log_indent_decorator
|
||||
def _fix_bad_stock_split(self, df, interval, tz_exchange):
|
||||
# Repair idea is to look for BIG daily price changes that closely match the
|
||||
# most recent stock split ratio. This indicates Yahoo failed to apply a new
|
||||
# stock split to old price data.
|
||||
#
|
||||
# There is a slight complication, because Yahoo does another stupid thing.
|
||||
# Sometimes the old data is adjusted twice. So cannot simply assume
|
||||
# which direction to reverse adjustment - have to analyse prices and detect.
|
||||
# Not difficult.
|
||||
|
||||
logger = utils.get_yf_logger()
|
||||
|
||||
interday = interval in ['1d', '1wk', '1mo', '3mo']
|
||||
if not interday:
|
||||
return df
|
||||
|
||||
# Find the most recent stock split
|
||||
df = df.sort_index(ascending=False)
|
||||
split_f = df['Stock Splits'].to_numpy() != 0
|
||||
if not split_f.any():
|
||||
return df
|
||||
most_recent_split_day = df.index[split_f].max()
|
||||
split = df.loc[most_recent_split_day, 'Stock Splits']
|
||||
if most_recent_split_day == df.index[0]:
|
||||
logger.info("price-repair-split: Need 1+ day of price data after split to determine true price. Won't repair")
|
||||
return df
|
||||
|
||||
logger.debug(f'price-repair-split: Most recent split = {split:.4f} @ {most_recent_split_day.date()}')
|
||||
|
||||
return self._fix_prices_sudden_change(df, interval, tz_exchange, split, correct_volume=True)
|
||||
|
||||
@utils.log_indent_decorator
|
||||
def _fix_prices_sudden_change(self, df, interval, tz_exchange, change, correct_volume=False):
|
||||
logger = utils.get_yf_logger()
|
||||
|
||||
df = df.sort_index(ascending=False)
|
||||
split = change
|
||||
split_rcp = 1.0/split
|
||||
interday = interval in ['1d', '1wk', '1mo', '3mo']
|
||||
|
||||
OHLC = ['Open', 'Low', 'High', 'Close']
|
||||
OHLCA = OHLC + ['Adj Close']
|
||||
|
||||
# Do not attempt repair of the split is small,
|
||||
# could be mistaken for normal price variance
|
||||
if split > 0.8 and split < 1.25:
|
||||
logger.info("price-repair-split: Split ratio too close to 1. Won't repair")
|
||||
return df
|
||||
|
||||
df2 = df.copy()
|
||||
if df2.index.tz is None:
|
||||
df2.index = df2.index.tz_localize(tz_exchange)
|
||||
elif df2.index.tz != tz_exchange:
|
||||
df2.index = df2.index.tz_convert(tz_exchange)
|
||||
n = df2.shape[0]
|
||||
|
||||
if logger.isEnabledFor(logging.DEBUG):
|
||||
df_debug = df2.copy()
|
||||
df_debug = df_debug.drop(['Adj Close', 'Low', 'High', 'Volume', 'Dividends', 'Repaired?'], axis=1, errors='ignore')
|
||||
|
||||
# Calculate daily price % change. To reduce effect of price volatility,
|
||||
# calculate change for each OHLC column and select value nearest 1.0.
|
||||
_1d_change_x = _np.full((n, 4), 1.0)
|
||||
price_data = df2[OHLC].replace(0.0, 1.0).to_numpy()
|
||||
_1d_change_x[1:] = price_data[1:,] / price_data[:-1,]
|
||||
diff = _np.abs(_1d_change_x - 1.0)
|
||||
j_indices = _np.argmin(diff, axis=1)
|
||||
_1d_change_minx = _1d_change_x[_np.arange(n), j_indices]
|
||||
f_na = _np.isnan(_1d_change_minx)
|
||||
if f_na.any():
|
||||
# Possible if data was too old for reconstruction.
|
||||
_1d_change_minx[f_na] = 1.0
|
||||
if logger.isEnabledFor(logging.DEBUG):
|
||||
df_debug['1D change X'] = _1d_change_minx
|
||||
|
||||
# If all 1D changes are closer to 1.0 than split, exit
|
||||
split_max = max(split, split_rcp)
|
||||
if _np.max(_1d_change_minx) < (split_max-1)*0.5+1 and _np.min(_1d_change_minx) > 1.0/((split_max-1)*0.5 +1):
|
||||
logger.info(f"price-repair-split: No bad splits detected")
|
||||
return df
|
||||
|
||||
# Calculate the true price variance, i.e. remove effect of bad split-adjustments.
|
||||
# Key = ignore 1D changes outside of interquartile range
|
||||
q1, q3 = _np.percentile(_1d_change_minx, [25, 75])
|
||||
iqr = q3 - q1
|
||||
lower_bound = q1 - 1.5 * iqr
|
||||
upper_bound = q3 + 1.5 * iqr
|
||||
f = (_1d_change_minx >= lower_bound) & (_1d_change_minx <= upper_bound)
|
||||
avg = _np.mean(_1d_change_minx[f])
|
||||
sd = _np.std(_1d_change_minx[f])
|
||||
# Now can calculate SD as % of mean
|
||||
sd_pct = sd / avg
|
||||
logger.debug(f"price-repair-split: Estimation of true 1D change stats: mean = {avg:.2f}, StdDev = {sd:.4f} ({sd_pct*100.0:.1f}% of mean)")
|
||||
|
||||
# Only proceed if split adjustment far exceeds normal 1D changes
|
||||
largest_change_pct = 5*sd_pct
|
||||
if interday and interval != '1d':
|
||||
largest_change_pct *= 5
|
||||
if (max(split, split_rcp) < 1.0+largest_change_pct):
|
||||
logger.info("price-repair-split: Split ratio too close to normal price volatility. Won't repair")
|
||||
# if logger.isEnabledFor(logging.DEBUG):
|
||||
# logger.debug(f"price-repair-split: my workings:")
|
||||
# logger.debug('\n' + str(df_debug))
|
||||
return df
|
||||
|
||||
# Now can detect bad split adjustments
|
||||
# Set threshold to halfway between split ratio and largest expected normal price change
|
||||
r = _1d_change_minx / split_rcp
|
||||
split_max = max(split, split_rcp)
|
||||
logger.debug(f"price-repair-split: split_max={split_max:.3f} largest_change_pct={largest_change_pct:.4f}")
|
||||
threshold = (split_max + 1.0+largest_change_pct) * 0.5
|
||||
logger.debug(f"price-repair-split: threshold={threshold:.3f}")
|
||||
|
||||
if 'Repaired?' not in df2.columns:
|
||||
df2['Repaired?'] = False
|
||||
|
||||
if interday and interval != '1d':
|
||||
# Yahoo creates multi-day intervals using potentiall corrupt data, e.g.
|
||||
# the Close could be 100x Open. This means have to correct each OHLC column
|
||||
# individually
|
||||
correct_columns_individually = True
|
||||
else:
|
||||
correct_columns_individually = False
|
||||
|
||||
if correct_columns_individually:
|
||||
_1d_change_x = _np.full((n, 4), 1.0)
|
||||
price_data = df2[OHLC].replace(0.0, 1.0).to_numpy()
|
||||
_1d_change_x[1:] = price_data[1:,] / price_data[:-1,]
|
||||
else:
|
||||
_1d_change_x = _1d_change_minx
|
||||
|
||||
r = _1d_change_x / split_rcp
|
||||
f1 = _1d_change_x < 1.0/threshold
|
||||
f2 = _1d_change_x > threshold
|
||||
f = f1 | f2
|
||||
if logger.isEnabledFor(logging.DEBUG):
|
||||
if not correct_columns_individually:
|
||||
df_debug['r'] = r
|
||||
df_debug['f1'] = f1
|
||||
df_debug['f2'] = f2
|
||||
else:
|
||||
for j in range(len(OHLC)):
|
||||
c = OHLC[j]
|
||||
df_debug[c+'_r'] = r[:,j]
|
||||
df_debug[c+'_f1'] = f1[:,j]
|
||||
df_debug[c+'_f2'] = f2[:,j]
|
||||
|
||||
if not f.any():
|
||||
logger.info('price-repair-split: No bad split adjustments detected')
|
||||
return df
|
||||
|
||||
def map_signals_to_ranges(f, f1):
|
||||
true_indices = _np.where(f)[0]
|
||||
ranges = []
|
||||
for i in range(len(true_indices) - 1):
|
||||
if i % 2 == 0:
|
||||
if split > 1.0:
|
||||
adj = 'split' if f1[true_indices[i]] else '1.0/split'
|
||||
else:
|
||||
adj = '1.0/split' if f1[true_indices[i]] else 'split'
|
||||
ranges.append((true_indices[i], true_indices[i+1], adj))
|
||||
if len(true_indices) % 2 != 0:
|
||||
if split > 1.0:
|
||||
adj = 'split' if f1[true_indices[-1]] else '1.0/split'
|
||||
else:
|
||||
adj = '1.0/split' if f1[true_indices[-1]] else 'split'
|
||||
ranges.append((true_indices[-1], len(f), adj))
|
||||
return ranges
|
||||
|
||||
if correct_columns_individually:
|
||||
f_corrected = _np.full(n, False)
|
||||
if correct_volume:
|
||||
# If Open or Close is repaired but not both,
|
||||
# then this means the interval has a mix of correct
|
||||
# and errors. A problem for correcting Volume,
|
||||
# so use a heuristic:
|
||||
# - if both Open & Close were Nx bad => Volume is Nx bad
|
||||
# - if only one of Open & Close are Nx bad => Volume is 0.5*Nx bad
|
||||
f_open_fixed = _np.full(n, False)
|
||||
f_close_fixed = _np.full(n, False)
|
||||
for j in range(len(OHLC)):
|
||||
c = OHLC[j]
|
||||
ranges = map_signals_to_ranges(f[:,j], f1[:,j])
|
||||
|
||||
for r in ranges:
|
||||
if r[2] == 'split':
|
||||
m = split ; m_rcp = split_rcp
|
||||
else:
|
||||
m = split_rcp ; m_rcp = split
|
||||
if interday:
|
||||
logger.debug(f"price-repair-split: col={c} range=[{df2.index[r[0]].date()}:{df2.index[r[1]-1].date()}] m={m:.4f}")
|
||||
else:
|
||||
logger.debug(f"price-repair-split: col={c} range=[{df2.index[r[0]]}:{df2.index[r[1]-1]}] m={m:.4f}")
|
||||
df2.iloc[r[0]:r[1], df2.columns.get_loc(c)] *= m
|
||||
if c == 'Close':
|
||||
df2.iloc[r[0]:r[1], df2.columns.get_loc('Adj Close')] *= m
|
||||
if correct_volume:
|
||||
if c == 'Open':
|
||||
f_open_fixed[r[0]:r[1]] = True
|
||||
elif c == 'Close':
|
||||
f_close_fixed[r[0]:r[1]] = True
|
||||
f_corrected[r[0]:r[1]] = True
|
||||
|
||||
if correct_volume:
|
||||
f_open_and_closed_fixed = f_open_fixed & f_close_fixed
|
||||
f_open_xor_closed_fixed = _np.logical_xor(f_open_fixed, f_close_fixed)
|
||||
if f_open_and_closed_fixed.any():
|
||||
df2.loc[f_open_and_closed_fixed, "Volume"] *= m_rcp
|
||||
if f_open_xor_closed_fixed.any():
|
||||
df2.loc[f_open_xor_closed_fixed, "Volume"] *= 0.5*m_rcp
|
||||
|
||||
df2.loc[f_corrected, 'Repaired?'] = True
|
||||
|
||||
else:
|
||||
ranges = map_signals_to_ranges(f, f1)
|
||||
for r in ranges:
|
||||
if r[2] == 'split':
|
||||
m = split ; m_rcp = split_rcp
|
||||
else:
|
||||
m = split_rcp ; m_rcp = split
|
||||
logger.debug(f"price-repair-split: range={r} m={m}")
|
||||
for c in ['Open', 'High', 'Low', 'Close', 'Adj Close']:
|
||||
df2.iloc[r[0]:r[1], df2.columns.get_loc(c)] *= m
|
||||
if correct_volume:
|
||||
df2.iloc[r[0]:r[1], df2.columns.get_loc("Volume")] *= m_rcp
|
||||
df2.iloc[r[0]:r[1], df2.columns.get_loc('Repaired?')] = True
|
||||
if r[0] == r[1]-1:
|
||||
if interday:
|
||||
msg = f"price-repair-split: Corrected bad split adjustment on interval {df2.index[r[0]].date()}"
|
||||
else:
|
||||
msg = f"price-repair-split: Corrected bad split adjustment on interval {df2.index[r[0]]}"
|
||||
else:
|
||||
# Note: df2 sorted with index descending
|
||||
start = df2.index[r[1]-1]
|
||||
end = df2.index[r[0]]
|
||||
if interday:
|
||||
msg = f"price-repair-split: Corrected bad split adjustment across intervals {start.date()} -> {end.date()} (inclusive)"
|
||||
else:
|
||||
msg = f"price-repair-split: Corrected bad split adjustment across intervals {start} -> {end} (inclusive)"
|
||||
logger.info(msg)
|
||||
|
||||
if correct_volume:
|
||||
df2['Volume'] = df2['Volume'].round(0).astype('int')
|
||||
|
||||
# if logger.isEnabledFor(logging.DEBUG):
|
||||
# logger.debug(f"price-repair-split: my workings:")
|
||||
# logger.debug('\n' + str(df_debug))
|
||||
|
||||
return df2
|
||||
|
||||
return df3
|
||||
|
||||
def _get_ticker_tz(self, proxy, timeout):
|
||||
if self._tz is not None:
|
||||
@@ -1030,9 +1426,12 @@ class TickerBase:
|
||||
self._tz = tz
|
||||
return tz
|
||||
|
||||
@utils.log_indent_decorator
|
||||
def _fetch_ticker_tz(self, proxy, timeout):
|
||||
# Query Yahoo for fast price data just to get returned timezone
|
||||
|
||||
logger = utils.get_yf_logger()
|
||||
|
||||
params = {"range": "1d", "interval": "1d"}
|
||||
|
||||
# Getting data from json
|
||||
@@ -1184,7 +1583,7 @@ class TickerBase:
|
||||
return dict_data
|
||||
return data
|
||||
|
||||
def get_income_stmt(self, proxy=None, as_dict=False, pretty=False, freq="yearly", legacy=False):
|
||||
def get_income_stmt(self, proxy=None, as_dict=False, pretty=False, freq="yearly"):
|
||||
"""
|
||||
:Parameters:
|
||||
as_dict: bool
|
||||
@@ -1196,19 +1595,13 @@ class TickerBase:
|
||||
freq: str
|
||||
"yearly" or "quarterly"
|
||||
Default is "yearly"
|
||||
legacy: bool
|
||||
Return old financials tables. Useful for when new tables not available
|
||||
Default is False
|
||||
proxy: str
|
||||
Optional. Proxy server URL scheme
|
||||
Default is None
|
||||
"""
|
||||
self._fundamentals.proxy = proxy
|
||||
|
||||
if legacy:
|
||||
data = self._fundamentals.financials.get_income_scrape(freq=freq, proxy=proxy)
|
||||
else:
|
||||
data = self._fundamentals.financials.get_income_time_series(freq=freq, proxy=proxy)
|
||||
data = self._fundamentals.financials.get_income_time_series(freq=freq, proxy=proxy)
|
||||
|
||||
if pretty:
|
||||
data = data.copy()
|
||||
@@ -1217,13 +1610,13 @@ class TickerBase:
|
||||
return data.to_dict()
|
||||
return data
|
||||
|
||||
def get_incomestmt(self, proxy=None, as_dict=False, pretty=False, freq="yearly", legacy=False):
|
||||
return self.get_income_stmt(proxy, as_dict, pretty, freq, legacy)
|
||||
def get_incomestmt(self, proxy=None, as_dict=False, pretty=False, freq="yearly"):
|
||||
return self.get_income_stmt(proxy, as_dict, pretty, freq)
|
||||
|
||||
def get_financials(self, proxy=None, as_dict=False, pretty=False, freq="yearly", legacy=False):
|
||||
return self.get_income_stmt(proxy, as_dict, pretty, freq, legacy)
|
||||
def get_financials(self, proxy=None, as_dict=False, pretty=False, freq="yearly"):
|
||||
return self.get_income_stmt(proxy, as_dict, pretty, freq)
|
||||
|
||||
def get_balance_sheet(self, proxy=None, as_dict=False, pretty=False, freq="yearly", legacy=False):
|
||||
def get_balance_sheet(self, proxy=None, as_dict=False, pretty=False, freq="yearly"):
|
||||
"""
|
||||
:Parameters:
|
||||
as_dict: bool
|
||||
@@ -1235,19 +1628,13 @@ class TickerBase:
|
||||
freq: str
|
||||
"yearly" or "quarterly"
|
||||
Default is "yearly"
|
||||
legacy: bool
|
||||
Return old financials tables. Useful for when new tables not available
|
||||
Default is False
|
||||
proxy: str
|
||||
Optional. Proxy server URL scheme
|
||||
Default is None
|
||||
"""
|
||||
self._fundamentals.proxy = proxy
|
||||
|
||||
if legacy:
|
||||
data = self._fundamentals.financials.get_balance_sheet_scrape(freq=freq, proxy=proxy)
|
||||
else:
|
||||
data = self._fundamentals.financials.get_balance_sheet_time_series(freq=freq, proxy=proxy)
|
||||
data = self._fundamentals.financials.get_balance_sheet_time_series(freq=freq, proxy=proxy)
|
||||
|
||||
if pretty:
|
||||
data = data.copy()
|
||||
@@ -1256,10 +1643,10 @@ class TickerBase:
|
||||
return data.to_dict()
|
||||
return data
|
||||
|
||||
def get_balancesheet(self, proxy=None, as_dict=False, pretty=False, freq="yearly", legacy=False):
|
||||
return self.get_balance_sheet(proxy, as_dict, pretty, freq, legacy)
|
||||
def get_balancesheet(self, proxy=None, as_dict=False, pretty=False, freq="yearly"):
|
||||
return self.get_balance_sheet(proxy, as_dict, pretty, freq)
|
||||
|
||||
def get_cash_flow(self, proxy=None, as_dict=False, pretty=False, freq="yearly", legacy=False):
|
||||
def get_cash_flow(self, proxy=None, as_dict=False, pretty=False, freq="yearly"):
|
||||
"""
|
||||
:Parameters:
|
||||
as_dict: bool
|
||||
@@ -1271,19 +1658,13 @@ class TickerBase:
|
||||
freq: str
|
||||
"yearly" or "quarterly"
|
||||
Default is "yearly"
|
||||
legacy: bool
|
||||
Return old financials tables. Useful for when new tables not available
|
||||
Default is False
|
||||
proxy: str
|
||||
Optional. Proxy server URL scheme
|
||||
Default is None
|
||||
"""
|
||||
self._fundamentals.proxy = proxy
|
||||
|
||||
if legacy:
|
||||
data = self._fundamentals.financials.get_cash_flow_scrape(freq=freq, proxy=proxy)
|
||||
else:
|
||||
data = self._fundamentals.financials.get_cash_flow_time_series(freq=freq, proxy=proxy)
|
||||
data = self._fundamentals.financials.get_cash_flow_time_series(freq=freq, proxy=proxy)
|
||||
|
||||
if pretty:
|
||||
data = data.copy()
|
||||
@@ -1292,8 +1673,8 @@ class TickerBase:
|
||||
return data.to_dict()
|
||||
return data
|
||||
|
||||
def get_cashflow(self, proxy=None, as_dict=False, pretty=False, freq="yearly", legacy=False):
|
||||
return self.get_cash_flow(proxy, as_dict, pretty, freq, legacy)
|
||||
def get_cashflow(self, proxy=None, as_dict=False, pretty=False, freq="yearly"):
|
||||
return self.get_cash_flow(proxy, as_dict, pretty, freq)
|
||||
|
||||
def get_dividends(self, proxy=None):
|
||||
if self._history is None:
|
||||
@@ -1337,7 +1718,10 @@ class TickerBase:
|
||||
return data.to_dict()
|
||||
return data
|
||||
|
||||
@utils.log_indent_decorator
|
||||
def get_shares_full(self, start=None, end=None, proxy=None):
|
||||
logger = utils.get_yf_logger()
|
||||
|
||||
# Process dates
|
||||
tz = self._get_ticker_tz(proxy=None, timeout=10)
|
||||
dt_now = _pd.Timestamp.utcnow().tz_convert(tz)
|
||||
@@ -1445,6 +1829,7 @@ class TickerBase:
|
||||
self._news = data.get("news", [])
|
||||
return self._news
|
||||
|
||||
@utils.log_indent_decorator
|
||||
def get_earnings_dates(self, limit=12, proxy=None) -> Optional[pd.DataFrame]:
|
||||
"""
|
||||
Get earning dates (future and historic)
|
||||
@@ -1458,6 +1843,8 @@ class TickerBase:
|
||||
if self._earnings_dates and limit in self._earnings_dates:
|
||||
return self._earnings_dates[limit]
|
||||
|
||||
logger = utils.get_yf_logger()
|
||||
|
||||
page_size = min(limit, 100) # YF caps at 100, don't go higher
|
||||
page_offset = 0
|
||||
dates = None
|
||||
|
||||
8
yfinance/const.py
Normal file
8
yfinance/const.py
Normal file
@@ -0,0 +1,8 @@
|
||||
|
||||
fundamentals_keys = {}
|
||||
|
||||
fundamentals_keys['financials'] = ["TaxEffectOfUnusualItems","TaxRateForCalcs","NormalizedEBITDA","NormalizedDilutedEPS","NormalizedBasicEPS","TotalUnusualItems","TotalUnusualItemsExcludingGoodwill","NetIncomeFromContinuingOperationNetMinorityInterest","ReconciledDepreciation","ReconciledCostOfRevenue","EBITDA","EBIT","NetInterestIncome","InterestExpense","InterestIncome","ContinuingAndDiscontinuedDilutedEPS","ContinuingAndDiscontinuedBasicEPS","NormalizedIncome","NetIncomeFromContinuingAndDiscontinuedOperation","TotalExpenses","RentExpenseSupplemental","ReportedNormalizedDilutedEPS","ReportedNormalizedBasicEPS","TotalOperatingIncomeAsReported","DividendPerShare","DilutedAverageShares","BasicAverageShares","DilutedEPS","DilutedEPSOtherGainsLosses","TaxLossCarryforwardDilutedEPS","DilutedAccountingChange","DilutedExtraordinary","DilutedDiscontinuousOperations","DilutedContinuousOperations","BasicEPS","BasicEPSOtherGainsLosses","TaxLossCarryforwardBasicEPS","BasicAccountingChange","BasicExtraordinary","BasicDiscontinuousOperations","BasicContinuousOperations","DilutedNIAvailtoComStockholders","AverageDilutionEarnings","NetIncomeCommonStockholders","OtherunderPreferredStockDividend","PreferredStockDividends","NetIncome","MinorityInterests","NetIncomeIncludingNoncontrollingInterests","NetIncomeFromTaxLossCarryforward","NetIncomeExtraordinary","NetIncomeDiscontinuousOperations","NetIncomeContinuousOperations","EarningsFromEquityInterestNetOfTax","TaxProvision","PretaxIncome","OtherIncomeExpense","OtherNonOperatingIncomeExpenses","SpecialIncomeCharges","GainOnSaleOfPPE","GainOnSaleOfBusiness","OtherSpecialCharges","WriteOff","ImpairmentOfCapitalAssets","RestructuringAndMergernAcquisition","SecuritiesAmortization","EarningsFromEquityInterest","GainOnSaleOfSecurity","NetNonOperatingInterestIncomeExpense","TotalOtherFinanceCost","InterestExpenseNonOperating","InterestIncomeNonOperating","OperatingIncome","OperatingExpense","OtherOperatingExpenses","OtherTaxes","ProvisionForDoubtfulAccounts","DepreciationAmortizationDepletionIncomeStatement","DepletionIncomeStatement","DepreciationAndAmortizationInIncomeStatement","Amortization","AmortizationOfIntangiblesIncomeStatement","DepreciationIncomeStatement","ResearchAndDevelopment","SellingGeneralAndAdministration","SellingAndMarketingExpense","GeneralAndAdministrativeExpense","OtherGandA","InsuranceAndClaims","RentAndLandingFees","SalariesAndWages","GrossProfit","CostOfRevenue","TotalRevenue","ExciseTaxes","OperatingRevenue"]
|
||||
|
||||
fundamentals_keys['balance-sheet'] = ["TreasurySharesNumber","PreferredSharesNumber","OrdinarySharesNumber","ShareIssued","NetDebt","TotalDebt","TangibleBookValue","InvestedCapital","WorkingCapital","NetTangibleAssets","CapitalLeaseObligations","CommonStockEquity","PreferredStockEquity","TotalCapitalization","TotalEquityGrossMinorityInterest","MinorityInterest","StockholdersEquity","OtherEquityInterest","GainsLossesNotAffectingRetainedEarnings","OtherEquityAdjustments","FixedAssetsRevaluationReserve","ForeignCurrencyTranslationAdjustments","MinimumPensionLiabilities","UnrealizedGainLoss","TreasuryStock","RetainedEarnings","AdditionalPaidInCapital","CapitalStock","OtherCapitalStock","CommonStock","PreferredStock","TotalPartnershipCapital","GeneralPartnershipCapital","LimitedPartnershipCapital","TotalLiabilitiesNetMinorityInterest","TotalNonCurrentLiabilitiesNetMinorityInterest","OtherNonCurrentLiabilities","LiabilitiesHeldforSaleNonCurrent","RestrictedCommonStock","PreferredSecuritiesOutsideStockEquity","DerivativeProductLiabilities","EmployeeBenefits","NonCurrentPensionAndOtherPostretirementBenefitPlans","NonCurrentAccruedExpenses","DuetoRelatedPartiesNonCurrent","TradeandOtherPayablesNonCurrent","NonCurrentDeferredLiabilities","NonCurrentDeferredRevenue","NonCurrentDeferredTaxesLiabilities","LongTermDebtAndCapitalLeaseObligation","LongTermCapitalLeaseObligation","LongTermDebt","LongTermProvisions","CurrentLiabilities","OtherCurrentLiabilities","CurrentDeferredLiabilities","CurrentDeferredRevenue","CurrentDeferredTaxesLiabilities","CurrentDebtAndCapitalLeaseObligation","CurrentCapitalLeaseObligation","CurrentDebt","OtherCurrentBorrowings","LineOfCredit","CommercialPaper","CurrentNotesPayable","PensionandOtherPostRetirementBenefitPlansCurrent","CurrentProvisions","PayablesAndAccruedExpenses","CurrentAccruedExpenses","InterestPayable","Payables","OtherPayable","DuetoRelatedPartiesCurrent","DividendsPayable","TotalTaxPayable","IncomeTaxPayable","AccountsPayable","TotalAssets","TotalNonCurrentAssets","OtherNonCurrentAssets","DefinedPensionBenefit","NonCurrentPrepaidAssets","NonCurrentDeferredAssets","NonCurrentDeferredTaxesAssets","DuefromRelatedPartiesNonCurrent","NonCurrentNoteReceivables","NonCurrentAccountsReceivable","FinancialAssets","InvestmentsAndAdvances","OtherInvestments","InvestmentinFinancialAssets","HeldToMaturitySecurities","AvailableForSaleSecurities","FinancialAssetsDesignatedasFairValueThroughProfitorLossTotal","TradingSecurities","LongTermEquityInvestment","InvestmentsinJointVenturesatCost","InvestmentsInOtherVenturesUnderEquityMethod","InvestmentsinAssociatesatCost","InvestmentsinSubsidiariesatCost","InvestmentProperties","GoodwillAndOtherIntangibleAssets","OtherIntangibleAssets","Goodwill","NetPPE","AccumulatedDepreciation","GrossPPE","Leases","ConstructionInProgress","OtherProperties","MachineryFurnitureEquipment","BuildingsAndImprovements","LandAndImprovements","Properties","CurrentAssets","OtherCurrentAssets","HedgingAssetsCurrent","AssetsHeldForSaleCurrent","CurrentDeferredAssets","CurrentDeferredTaxesAssets","RestrictedCash","PrepaidAssets","Inventory","InventoriesAdjustmentsAllowances","OtherInventories","FinishedGoods","WorkInProcess","RawMaterials","Receivables","ReceivablesAdjustmentsAllowances","OtherReceivables","DuefromRelatedPartiesCurrent","TaxesReceivable","AccruedInterestReceivable","NotesReceivable","LoansReceivable","AccountsReceivable","AllowanceForDoubtfulAccountsReceivable","GrossAccountsReceivable","CashCashEquivalentsAndShortTermInvestments","OtherShortTermInvestments","CashAndCashEquivalents","CashEquivalents","CashFinancial"]
|
||||
|
||||
fundamentals_keys['cash-flow'] = ["ForeignSales","DomesticSales","AdjustedGeographySegmentData","FreeCashFlow","RepurchaseOfCapitalStock","RepaymentOfDebt","IssuanceOfDebt","IssuanceOfCapitalStock","CapitalExpenditure","InterestPaidSupplementalData","IncomeTaxPaidSupplementalData","EndCashPosition","OtherCashAdjustmentOutsideChangeinCash","BeginningCashPosition","EffectOfExchangeRateChanges","ChangesInCash","OtherCashAdjustmentInsideChangeinCash","CashFlowFromDiscontinuedOperation","FinancingCashFlow","CashFromDiscontinuedFinancingActivities","CashFlowFromContinuingFinancingActivities","NetOtherFinancingCharges","InterestPaidCFF","ProceedsFromStockOptionExercised","CashDividendsPaid","PreferredStockDividendPaid","CommonStockDividendPaid","NetPreferredStockIssuance","PreferredStockPayments","PreferredStockIssuance","NetCommonStockIssuance","CommonStockPayments","CommonStockIssuance","NetIssuancePaymentsOfDebt","NetShortTermDebtIssuance","ShortTermDebtPayments","ShortTermDebtIssuance","NetLongTermDebtIssuance","LongTermDebtPayments","LongTermDebtIssuance","InvestingCashFlow","CashFromDiscontinuedInvestingActivities","CashFlowFromContinuingInvestingActivities","NetOtherInvestingChanges","InterestReceivedCFI","DividendsReceivedCFI","NetInvestmentPurchaseAndSale","SaleOfInvestment","PurchaseOfInvestment","NetInvestmentPropertiesPurchaseAndSale","SaleOfInvestmentProperties","PurchaseOfInvestmentProperties","NetBusinessPurchaseAndSale","SaleOfBusiness","PurchaseOfBusiness","NetIntangiblesPurchaseAndSale","SaleOfIntangibles","PurchaseOfIntangibles","NetPPEPurchaseAndSale","SaleOfPPE","PurchaseOfPPE","CapitalExpenditureReported","OperatingCashFlow","CashFromDiscontinuedOperatingActivities","CashFlowFromContinuingOperatingActivities","TaxesRefundPaid","InterestReceivedCFO","InterestPaidCFO","DividendReceivedCFO","DividendPaidCFO","ChangeInWorkingCapital","ChangeInOtherWorkingCapital","ChangeInOtherCurrentLiabilities","ChangeInOtherCurrentAssets","ChangeInPayablesAndAccruedExpense","ChangeInAccruedExpense","ChangeInInterestPayable","ChangeInPayable","ChangeInDividendPayable","ChangeInAccountPayable","ChangeInTaxPayable","ChangeInIncomeTaxPayable","ChangeInPrepaidAssets","ChangeInInventory","ChangeInReceivables","ChangesInAccountReceivables","OtherNonCashItems","ExcessTaxBenefitFromStockBasedCompensation","StockBasedCompensation","UnrealizedGainLossOnInvestmentSecurities","ProvisionandWriteOffofAssets","AssetImpairmentCharge","AmortizationOfSecurities","DeferredTax","DeferredIncomeTax","DepreciationAmortizationDepletion","Depletion","DepreciationAndAmortization","AmortizationCashFlow","AmortizationOfIntangibles","Depreciation","OperatingGainsLosses","PensionAndEmployeeBenefitExpense","EarningsLossesFromEquityInvestments","GainLossOnInvestmentSecurities","NetForeignCurrencyExchangeGainLoss","GainLossOnSaleOfPPE","GainLossOnSaleOfBusiness","NetIncomeFromContinuingOperations","CashFlowsfromusedinOperatingActivitiesDirect","TaxesRefundPaidDirect","InterestReceivedDirect","InterestPaidDirect","DividendsReceivedDirect","DividendsPaidDirect","ClassesofCashPayments","OtherCashPaymentsfromOperatingActivities","PaymentsonBehalfofEmployees","PaymentstoSuppliersforGoodsandServices","ClassesofCashReceiptsfromOperatingActivities","OtherCashReceiptsfromOperatingActivities","ReceiptsfromGovernmentGrants","ReceiptsfromCustomers"]
|
||||
260
yfinance/data.py
260
yfinance/data.py
@@ -2,36 +2,18 @@ import functools
|
||||
from functools import lru_cache
|
||||
|
||||
import logging
|
||||
import hashlib
|
||||
from base64 import b64decode
|
||||
usePycryptodome = False # slightly faster
|
||||
# usePycryptodome = True
|
||||
if usePycryptodome:
|
||||
from Crypto.Cipher import AES
|
||||
from Crypto.Util.Padding import unpad
|
||||
else:
|
||||
from cryptography.hazmat.primitives import padding
|
||||
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
|
||||
|
||||
import requests as requests
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
import random
|
||||
import time
|
||||
|
||||
from frozendict import frozendict
|
||||
|
||||
try:
|
||||
import ujson as json
|
||||
except ImportError:
|
||||
import json as json
|
||||
|
||||
from . import utils
|
||||
|
||||
cache_maxsize = 64
|
||||
|
||||
logger = utils.get_yf_logger()
|
||||
|
||||
|
||||
def lru_cache_freezeargs(func):
|
||||
"""
|
||||
@@ -54,127 +36,6 @@ def lru_cache_freezeargs(func):
|
||||
return wrapped
|
||||
|
||||
|
||||
def _extract_extra_keys_from_stores(data):
|
||||
new_keys = [k for k in data.keys() if k not in ["context", "plugins"]]
|
||||
new_keys_values = set([data[k] for k in new_keys])
|
||||
|
||||
# Maybe multiple keys have same value - keep one of each
|
||||
new_keys_uniq = []
|
||||
new_keys_uniq_values = set()
|
||||
for k in new_keys:
|
||||
v = data[k]
|
||||
if not v in new_keys_uniq_values:
|
||||
new_keys_uniq.append(k)
|
||||
new_keys_uniq_values.add(v)
|
||||
|
||||
return [data[k] for k in new_keys_uniq]
|
||||
|
||||
|
||||
def decrypt_cryptojs_aes_stores(data, keys=None):
|
||||
encrypted_stores = data['context']['dispatcher']['stores']
|
||||
|
||||
password = None
|
||||
if keys is not None:
|
||||
if not isinstance(keys, list):
|
||||
raise TypeError("'keys' must be list")
|
||||
candidate_passwords = keys
|
||||
else:
|
||||
candidate_passwords = []
|
||||
|
||||
if "_cs" in data and "_cr" in data:
|
||||
_cs = data["_cs"]
|
||||
_cr = data["_cr"]
|
||||
_cr = b"".join(int.to_bytes(i, length=4, byteorder="big", signed=True) for i in json.loads(_cr)["words"])
|
||||
password = hashlib.pbkdf2_hmac("sha1", _cs.encode("utf8"), _cr, 1, dklen=32).hex()
|
||||
|
||||
encrypted_stores = b64decode(encrypted_stores)
|
||||
assert encrypted_stores[0:8] == b"Salted__"
|
||||
salt = encrypted_stores[8:16]
|
||||
encrypted_stores = encrypted_stores[16:]
|
||||
|
||||
def _EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="md5") -> tuple:
|
||||
"""OpenSSL EVP Key Derivation Function
|
||||
Args:
|
||||
password (Union[str, bytes, bytearray]): Password to generate key from.
|
||||
salt (Union[bytes, bytearray]): Salt to use.
|
||||
keySize (int, optional): Output key length in bytes. Defaults to 32.
|
||||
ivSize (int, optional): Output Initialization Vector (IV) length in bytes. Defaults to 16.
|
||||
iterations (int, optional): Number of iterations to perform. Defaults to 1.
|
||||
hashAlgorithm (str, optional): Hash algorithm to use for the KDF. Defaults to 'md5'.
|
||||
Returns:
|
||||
key, iv: Derived key and Initialization Vector (IV) bytes.
|
||||
|
||||
Taken from: https://gist.github.com/rafiibrahim8/0cd0f8c46896cafef6486cb1a50a16d3
|
||||
OpenSSL original code: https://github.com/openssl/openssl/blob/master/crypto/evp/evp_key.c#L78
|
||||
"""
|
||||
|
||||
assert iterations > 0, "Iterations can not be less than 1."
|
||||
|
||||
if isinstance(password, str):
|
||||
password = password.encode("utf-8")
|
||||
|
||||
final_length = keySize + ivSize
|
||||
key_iv = b""
|
||||
block = None
|
||||
|
||||
while len(key_iv) < final_length:
|
||||
hasher = hashlib.new(hashAlgorithm)
|
||||
if block:
|
||||
hasher.update(block)
|
||||
hasher.update(password)
|
||||
hasher.update(salt)
|
||||
block = hasher.digest()
|
||||
for _ in range(1, iterations):
|
||||
block = hashlib.new(hashAlgorithm, block).digest()
|
||||
key_iv += block
|
||||
|
||||
key, iv = key_iv[:keySize], key_iv[keySize:final_length]
|
||||
return key, iv
|
||||
|
||||
def _decrypt(encrypted_stores, password, key, iv):
|
||||
if usePycryptodome:
|
||||
cipher = AES.new(key, AES.MODE_CBC, iv=iv)
|
||||
plaintext = cipher.decrypt(encrypted_stores)
|
||||
plaintext = unpad(plaintext, 16, style="pkcs7")
|
||||
else:
|
||||
cipher = Cipher(algorithms.AES(key), modes.CBC(iv))
|
||||
decryptor = cipher.decryptor()
|
||||
plaintext = decryptor.update(encrypted_stores) + decryptor.finalize()
|
||||
unpadder = padding.PKCS7(128).unpadder()
|
||||
plaintext = unpadder.update(plaintext) + unpadder.finalize()
|
||||
plaintext = plaintext.decode("utf-8")
|
||||
return plaintext
|
||||
|
||||
if not password is None:
|
||||
try:
|
||||
key, iv = _EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="md5")
|
||||
except:
|
||||
raise Exception("yfinance failed to decrypt Yahoo data response")
|
||||
plaintext = _decrypt(encrypted_stores, password, key, iv)
|
||||
else:
|
||||
success = False
|
||||
for i in range(len(candidate_passwords)):
|
||||
# print(f"Trying candiate pw {i+1}/{len(candidate_passwords)}")
|
||||
password = candidate_passwords[i]
|
||||
try:
|
||||
key, iv = _EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="md5")
|
||||
|
||||
plaintext = _decrypt(encrypted_stores, password, key, iv)
|
||||
|
||||
success = True
|
||||
break
|
||||
except:
|
||||
pass
|
||||
if not success:
|
||||
raise Exception("yfinance failed to decrypt Yahoo data response")
|
||||
|
||||
decoded_stores = json.loads(plaintext)
|
||||
return decoded_stores
|
||||
|
||||
|
||||
_SCRAPE_URL_ = 'https://finance.yahoo.com/quote'
|
||||
|
||||
|
||||
class TickerData:
|
||||
"""
|
||||
Have one place to retrieve data from Yahoo API in order to ease caching and speed up operations
|
||||
@@ -213,124 +74,3 @@ class TickerData:
|
||||
response = self.get(url, user_agent_headers=user_agent_headers, params=params, proxy=proxy, timeout=timeout)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
def _get_decryption_keys_from_yahoo_js(self, soup):
|
||||
result = None
|
||||
|
||||
key_count = 4
|
||||
re_script = soup.find("script", string=re.compile("root.App.main")).text
|
||||
re_data = json.loads(re.search("root.App.main\s+=\s+(\{.*\})", re_script).group(1))
|
||||
re_data.pop("context", None)
|
||||
key_list = list(re_data.keys())
|
||||
if re_data.get("plugins"): # 1) attempt to get last 4 keys after plugins
|
||||
ind = key_list.index("plugins")
|
||||
if len(key_list) > ind+1:
|
||||
sub_keys = key_list[ind+1:]
|
||||
if len(sub_keys) == key_count:
|
||||
re_obj = {}
|
||||
missing_val = False
|
||||
for k in sub_keys:
|
||||
if not re_data.get(k):
|
||||
missing_val = True
|
||||
break
|
||||
re_obj.update({k: re_data.get(k)})
|
||||
if not missing_val:
|
||||
result = re_obj
|
||||
|
||||
if not result is None:
|
||||
return [''.join(result.values())]
|
||||
|
||||
re_keys = [] # 2) attempt scan main.js file approach to get keys
|
||||
prefix = "https://s.yimg.com/uc/finance/dd-site/js/main."
|
||||
tags = [tag['src'] for tag in soup.find_all('script') if prefix in tag.get('src', '')]
|
||||
for t in tags:
|
||||
response_js = self.cache_get(t)
|
||||
#
|
||||
if response_js.status_code != 200:
|
||||
time.sleep(random.randrange(10, 20))
|
||||
response_js.close()
|
||||
else:
|
||||
r_data = response_js.content.decode("utf8")
|
||||
re_list = [
|
||||
x.group() for x in re.finditer(r"context.dispatcher.stores=JSON.parse((?:.*?\r?\n?)*)toString", r_data)
|
||||
]
|
||||
for rl in re_list:
|
||||
re_sublist = [x.group() for x in re.finditer(r"t\[\"((?:.*?\r?\n?)*)\"\]", rl)]
|
||||
if len(re_sublist) == key_count:
|
||||
re_keys = [sl.replace('t["', '').replace('"]', '') for sl in re_sublist]
|
||||
break
|
||||
response_js.close()
|
||||
if len(re_keys) == key_count:
|
||||
break
|
||||
if len(re_keys) > 0:
|
||||
re_obj = {}
|
||||
missing_val = False
|
||||
for k in re_keys:
|
||||
if not re_data.get(k):
|
||||
missing_val = True
|
||||
break
|
||||
re_obj.update({k: re_data.get(k)})
|
||||
if not missing_val:
|
||||
return [''.join(re_obj.values())]
|
||||
|
||||
return []
|
||||
|
||||
@lru_cache_freezeargs
|
||||
@lru_cache(maxsize=cache_maxsize)
|
||||
def get_json_data_stores(self, sub_page: str = None, proxy=None) -> dict:
|
||||
'''
|
||||
get_json_data_stores returns a python dictionary of the data stores in yahoo finance web page.
|
||||
'''
|
||||
if sub_page:
|
||||
ticker_url = "{}/{}/{}".format(_SCRAPE_URL_, self.ticker, sub_page)
|
||||
else:
|
||||
ticker_url = "{}/{}".format(_SCRAPE_URL_, self.ticker)
|
||||
|
||||
response = self.get(url=ticker_url, proxy=proxy)
|
||||
html = response.text
|
||||
|
||||
# The actual json-data for stores is in a javascript assignment in the webpage
|
||||
try:
|
||||
json_str = html.split('root.App.main =')[1].split(
|
||||
'(this)')[0].split(';\n}')[0].strip()
|
||||
except IndexError:
|
||||
# Fetch failed, probably because Yahoo spam triggered
|
||||
return {}
|
||||
|
||||
data = json.loads(json_str)
|
||||
|
||||
# Gather decryption keys:
|
||||
soup = BeautifulSoup(response.content, "html.parser")
|
||||
keys = self._get_decryption_keys_from_yahoo_js(soup)
|
||||
if len(keys) == 0:
|
||||
msg = "No decryption keys could be extracted from JS file."
|
||||
if "requests_cache" in str(type(response)):
|
||||
msg += " Try flushing your 'requests_cache', probably parsing old JS."
|
||||
logger.warning("%s Falling back to backup decrypt methods.", msg)
|
||||
if len(keys) == 0:
|
||||
keys = []
|
||||
try:
|
||||
extra_keys = _extract_extra_keys_from_stores(data)
|
||||
keys = [''.join(extra_keys[-4:])]
|
||||
except:
|
||||
pass
|
||||
#
|
||||
keys_url = "https://github.com/ranaroussi/yfinance/raw/main/yfinance/scrapers/yahoo-keys.txt"
|
||||
response_gh = self.cache_get(keys_url)
|
||||
keys += response_gh.text.splitlines()
|
||||
|
||||
# Decrypt!
|
||||
stores = decrypt_cryptojs_aes_stores(data, keys)
|
||||
if stores is None:
|
||||
# Maybe Yahoo returned old format, not encrypted
|
||||
if "context" in data and "dispatcher" in data["context"]:
|
||||
stores = data['context']['dispatcher']['stores']
|
||||
if stores is None:
|
||||
raise Exception(f"{self.ticker}: Failed to extract data stores from web request")
|
||||
|
||||
# return data
|
||||
new_data = json.dumps(stores).replace('{}', 'null')
|
||||
new_data = re.sub(
|
||||
r'{[\'|\"]raw[\'|\"]:(.*?),(.*?)}', r'\1', new_data)
|
||||
|
||||
return json.loads(new_data)
|
||||
|
||||
@@ -4,3 +4,9 @@ class YFinanceException(Exception):
|
||||
|
||||
class YFinanceDataException(YFinanceException):
|
||||
pass
|
||||
|
||||
|
||||
class YFNotImplementedError(NotImplementedError):
|
||||
def __init__(self, method_name):
|
||||
super().__init__(f"Have not implemented fetching '{method_name}' from Yahoo API")
|
||||
|
||||
|
||||
@@ -30,10 +30,11 @@ import pandas as _pd
|
||||
from . import Ticker, utils
|
||||
from . import shared
|
||||
|
||||
@utils.log_indent_decorator
|
||||
def download(tickers, start=None, end=None, actions=False, threads=True, ignore_tz=None,
|
||||
group_by='column', auto_adjust=False, back_adjust=False, repair=False, keepna=False,
|
||||
progress=True, period="max", show_errors=None, interval="1d", prepost=False,
|
||||
proxy=None, rounding=False, timeout=10):
|
||||
proxy=None, rounding=False, timeout=10, session=None):
|
||||
"""Download yahoo tickers
|
||||
:Parameters:
|
||||
tickers : str, list
|
||||
@@ -82,15 +83,29 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
|
||||
timeout: None or float
|
||||
If not None stops waiting for a response after given number of
|
||||
seconds. (Can also be a fraction of a second e.g. 0.01)
|
||||
session: None or Session
|
||||
Optional. Pass your own session object to be used for all requests
|
||||
"""
|
||||
logger = utils.get_yf_logger()
|
||||
|
||||
if show_errors is not None:
|
||||
if show_errors:
|
||||
utils.print_once(f"yfinance: download(show_errors={show_errors}) argument is deprecated and will be removed in future version. Do this instead: logging.getLogger('yfinance').setLevel(logging.ERROR)")
|
||||
logging.getLogger('yfinance').setLevel(logging.ERROR)
|
||||
logger.setLevel(logging.ERROR)
|
||||
else:
|
||||
utils.print_once(f"yfinance: download(show_errors={show_errors}) argument is deprecated and will be removed in future version. Do this instead to suppress error messages: logging.getLogger('yfinance').setLevel(logging.CRITICAL)")
|
||||
logging.getLogger('yfinance').setLevel(logging.CRITICAL)
|
||||
logger.setLevel(logging.CRITICAL)
|
||||
|
||||
if logger.isEnabledFor(logging.DEBUG):
|
||||
if threads:
|
||||
# With DEBUG, each thread generates a lot of log messages.
|
||||
# And with multi-threading, these messages will be interleaved, bad!
|
||||
# So disable multi-threading to make log readable.
|
||||
logger.debug('Disabling multithreading because DEBUG logging enabled')
|
||||
threads = False
|
||||
if progress:
|
||||
# Disable progress bar, interferes with display of log messages
|
||||
progress = False
|
||||
|
||||
if ignore_tz is None:
|
||||
# Set default value depending on interval
|
||||
@@ -110,7 +125,7 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
|
||||
for ticker in tickers:
|
||||
if utils.is_isin(ticker):
|
||||
isin = ticker
|
||||
ticker = utils.get_ticker_by_isin(ticker, proxy)
|
||||
ticker = utils.get_ticker_by_isin(ticker, proxy, session=session)
|
||||
shared._ISINS[ticker] = isin
|
||||
_tickers_.append(ticker)
|
||||
|
||||
@@ -137,10 +152,9 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
|
||||
actions=actions, auto_adjust=auto_adjust,
|
||||
back_adjust=back_adjust, repair=repair, keepna=keepna,
|
||||
progress=(progress and i > 0), proxy=proxy,
|
||||
rounding=rounding, timeout=timeout)
|
||||
rounding=rounding, timeout=timeout, session=session)
|
||||
while len(shared._DFS) < len(tickers):
|
||||
_time.sleep(0.01)
|
||||
|
||||
# download synchronously
|
||||
else:
|
||||
for i, ticker in enumerate(tickers):
|
||||
@@ -149,10 +163,10 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
|
||||
actions=actions, auto_adjust=auto_adjust,
|
||||
back_adjust=back_adjust, repair=repair, keepna=keepna,
|
||||
proxy=proxy,
|
||||
rounding=rounding, timeout=timeout)
|
||||
rounding=rounding, timeout=timeout, session=session)
|
||||
if progress:
|
||||
shared._PROGRESS_BAR.animate()
|
||||
|
||||
|
||||
if progress:
|
||||
shared._PROGRESS_BAR.completed()
|
||||
|
||||
@@ -166,6 +180,7 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
|
||||
errors = {}
|
||||
for ticker in shared._ERRORS:
|
||||
err = shared._ERRORS[ticker]
|
||||
err = err.replace(f'{ticker}', '%ticker%')
|
||||
if not err in errors:
|
||||
errors[err] = [ticker]
|
||||
else:
|
||||
@@ -177,6 +192,7 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
|
||||
tbs = {}
|
||||
for ticker in shared._TRACEBACKS:
|
||||
tb = shared._TRACEBACKS[ticker]
|
||||
tb = tb.replace(f'{ticker}', '%ticker%')
|
||||
if not tb in tbs:
|
||||
tbs[tb] = [ticker]
|
||||
else:
|
||||
@@ -239,10 +255,10 @@ def _download_one_threaded(ticker, start=None, end=None,
|
||||
auto_adjust=False, back_adjust=False, repair=False,
|
||||
actions=False, progress=True, period="max",
|
||||
interval="1d", prepost=False, proxy=None,
|
||||
keepna=False, rounding=False, timeout=10):
|
||||
keepna=False, rounding=False, timeout=10, session=None):
|
||||
data = _download_one(ticker, start, end, auto_adjust, back_adjust, repair,
|
||||
actions, period, interval, prepost, proxy, rounding,
|
||||
keepna, timeout)
|
||||
keepna, timeout, session)
|
||||
if progress:
|
||||
shared._PROGRESS_BAR.animate()
|
||||
|
||||
@@ -251,10 +267,10 @@ def _download_one(ticker, start=None, end=None,
|
||||
auto_adjust=False, back_adjust=False, repair=False,
|
||||
actions=False, period="max", interval="1d",
|
||||
prepost=False, proxy=None, rounding=False,
|
||||
keepna=False, timeout=10):
|
||||
keepna=False, timeout=10, session=None):
|
||||
data = None
|
||||
try:
|
||||
data = Ticker(ticker).history(
|
||||
data = Ticker(ticker, session=session).history(
|
||||
period=period, interval=interval,
|
||||
start=start, end=end, prepost=prepost,
|
||||
actions=actions, auto_adjust=auto_adjust,
|
||||
|
||||
@@ -2,6 +2,7 @@ import pandas as pd
|
||||
|
||||
from yfinance import utils
|
||||
from yfinance.data import TickerData
|
||||
from yfinance.exceptions import YFNotImplementedError
|
||||
|
||||
|
||||
class Analysis:
|
||||
@@ -20,99 +21,29 @@ class Analysis:
|
||||
@property
|
||||
def earnings_trend(self) -> pd.DataFrame:
|
||||
if self._earnings_trend is None:
|
||||
self._scrape(self.proxy)
|
||||
raise YFNotImplementedError('earnings_trend')
|
||||
return self._earnings_trend
|
||||
|
||||
@property
|
||||
def analyst_trend_details(self) -> pd.DataFrame:
|
||||
if self._analyst_trend_details is None:
|
||||
self._scrape(self.proxy)
|
||||
raise YFNotImplementedError('analyst_trend_details')
|
||||
return self._analyst_trend_details
|
||||
|
||||
@property
|
||||
def analyst_price_target(self) -> pd.DataFrame:
|
||||
if self._analyst_price_target is None:
|
||||
self._scrape(self.proxy)
|
||||
raise YFNotImplementedError('analyst_price_target')
|
||||
return self._analyst_price_target
|
||||
|
||||
@property
|
||||
def rev_est(self) -> pd.DataFrame:
|
||||
if self._rev_est is None:
|
||||
self._scrape(self.proxy)
|
||||
raise YFNotImplementedError('rev_est')
|
||||
return self._rev_est
|
||||
|
||||
@property
|
||||
def eps_est(self) -> pd.DataFrame:
|
||||
if self._eps_est is None:
|
||||
self._scrape(self.proxy)
|
||||
raise YFNotImplementedError('eps_est')
|
||||
return self._eps_est
|
||||
|
||||
def _scrape(self, proxy):
|
||||
if self._already_scraped:
|
||||
return
|
||||
self._already_scraped = True
|
||||
|
||||
# Analysis Data/Analyst Forecasts
|
||||
analysis_data = self._data.get_json_data_stores("analysis", proxy=proxy)
|
||||
try:
|
||||
analysis_data = analysis_data['QuoteSummaryStore']
|
||||
except KeyError as e:
|
||||
err_msg = "No analysis data found, symbol may be delisted"
|
||||
logger.error('%s: %s', self._data.ticker, err_msg)
|
||||
return
|
||||
|
||||
if isinstance(analysis_data.get('earningsTrend'), dict):
|
||||
try:
|
||||
analysis = pd.DataFrame(analysis_data['earningsTrend']['trend'])
|
||||
analysis['endDate'] = pd.to_datetime(analysis['endDate'])
|
||||
analysis.set_index('period', inplace=True)
|
||||
analysis.index = analysis.index.str.upper()
|
||||
analysis.index.name = 'Period'
|
||||
analysis.columns = utils.camel2title(analysis.columns)
|
||||
|
||||
dict_cols = []
|
||||
|
||||
for idx, row in analysis.iterrows():
|
||||
for colname, colval in row.items():
|
||||
if isinstance(colval, dict):
|
||||
dict_cols.append(colname)
|
||||
for k, v in colval.items():
|
||||
new_colname = colname + ' ' + \
|
||||
utils.camel2title([k])[0]
|
||||
analysis.loc[idx, new_colname] = v
|
||||
|
||||
self._earnings_trend = analysis[[
|
||||
c for c in analysis.columns if c not in dict_cols]]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
self._analyst_trend_details = pd.DataFrame(analysis_data['recommendationTrend']['trend'])
|
||||
except Exception as e:
|
||||
self._analyst_trend_details = None
|
||||
try:
|
||||
self._analyst_price_target = pd.DataFrame(analysis_data['financialData'], index=[0])[
|
||||
['targetLowPrice', 'currentPrice', 'targetMeanPrice', 'targetHighPrice', 'numberOfAnalystOpinions']].T
|
||||
except Exception as e:
|
||||
self._analyst_price_target = None
|
||||
earnings_estimate = []
|
||||
revenue_estimate = []
|
||||
if self._analyst_trend_details is not None :
|
||||
for key in analysis_data['earningsTrend']['trend']:
|
||||
try:
|
||||
earnings_dict = key['earningsEstimate']
|
||||
earnings_dict['period'] = key['period']
|
||||
earnings_dict['endDate'] = key['endDate']
|
||||
earnings_estimate.append(earnings_dict)
|
||||
|
||||
revenue_dict = key['revenueEstimate']
|
||||
revenue_dict['period'] = key['period']
|
||||
revenue_dict['endDate'] = key['endDate']
|
||||
revenue_estimate.append(revenue_dict)
|
||||
except Exception as e:
|
||||
pass
|
||||
self._rev_est = pd.DataFrame(revenue_estimate)
|
||||
self._eps_est = pd.DataFrame(earnings_estimate)
|
||||
else:
|
||||
self._rev_est = pd.DataFrame()
|
||||
self._eps_est = pd.DataFrame()
|
||||
|
||||
@@ -5,11 +5,9 @@ import json
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
from yfinance import utils
|
||||
from yfinance import utils, const
|
||||
from yfinance.data import TickerData
|
||||
from yfinance.exceptions import YFinanceDataException, YFinanceException
|
||||
|
||||
logger = utils.get_yf_logger()
|
||||
from yfinance.exceptions import YFinanceException, YFNotImplementedError
|
||||
|
||||
class Fundamentals:
|
||||
|
||||
@@ -33,71 +31,15 @@ class Fundamentals:
|
||||
@property
|
||||
def earnings(self) -> dict:
|
||||
if self._earnings is None:
|
||||
self._scrape_earnings(self.proxy)
|
||||
raise YFNotImplementedError('earnings')
|
||||
return self._earnings
|
||||
|
||||
@property
|
||||
def shares(self) -> pd.DataFrame:
|
||||
if self._shares is None:
|
||||
self._scrape_shares(self.proxy)
|
||||
raise YFNotImplementedError('shares')
|
||||
return self._shares
|
||||
|
||||
def _scrape_basics(self, proxy):
|
||||
if self._basics_already_scraped:
|
||||
return
|
||||
self._basics_already_scraped = True
|
||||
|
||||
self._financials_data = self._data.get_json_data_stores('financials', proxy)
|
||||
try:
|
||||
self._fin_data_quote = self._financials_data['QuoteSummaryStore']
|
||||
except KeyError:
|
||||
err_msg = "No financials data found, symbol may be delisted"
|
||||
logger.error('%s: %s', self._data.ticker, err_msg)
|
||||
return None
|
||||
|
||||
def _scrape_earnings(self, proxy):
|
||||
self._scrape_basics(proxy)
|
||||
# earnings
|
||||
self._earnings = {"yearly": pd.DataFrame(), "quarterly": pd.DataFrame()}
|
||||
if self._fin_data_quote is None:
|
||||
return
|
||||
if isinstance(self._fin_data_quote.get('earnings'), dict):
|
||||
try:
|
||||
earnings = self._fin_data_quote['earnings']['financialsChart']
|
||||
earnings['financialCurrency'] = self._fin_data_quote['earnings'].get('financialCurrency', 'USD')
|
||||
self._earnings['financialCurrency'] = earnings['financialCurrency']
|
||||
df = pd.DataFrame(earnings['yearly']).set_index('date')
|
||||
df.columns = utils.camel2title(df.columns)
|
||||
df.index.name = 'Year'
|
||||
self._earnings['yearly'] = df
|
||||
|
||||
df = pd.DataFrame(earnings['quarterly']).set_index('date')
|
||||
df.columns = utils.camel2title(df.columns)
|
||||
df.index.name = 'Quarter'
|
||||
self._earnings['quarterly'] = df
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _scrape_shares(self, proxy):
|
||||
self._scrape_basics(proxy)
|
||||
# shares outstanding
|
||||
try:
|
||||
# keep only years with non None data
|
||||
available_shares = [shares_data for shares_data in
|
||||
self._financials_data['QuoteTimeSeriesStore']['timeSeries']['annualBasicAverageShares']
|
||||
if
|
||||
shares_data]
|
||||
shares = pd.DataFrame(available_shares)
|
||||
shares['Year'] = shares['asOfDate'].agg(lambda x: int(x[:4]))
|
||||
shares.set_index('Year', inplace=True)
|
||||
shares.drop(columns=['dataId', 'asOfDate',
|
||||
'periodType', 'currencyCode'], inplace=True)
|
||||
shares.rename(
|
||||
columns={'reportedValue': "BasicShares"}, inplace=True)
|
||||
self._shares = shares
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
class Financials:
|
||||
def __init__(self, data: TickerData):
|
||||
@@ -105,9 +47,6 @@ class Financials:
|
||||
self._income_time_series = {}
|
||||
self._balance_sheet_time_series = {}
|
||||
self._cash_flow_time_series = {}
|
||||
self._income_scraped = {}
|
||||
self._balance_sheet_scraped = {}
|
||||
self._cash_flow_scraped = {}
|
||||
|
||||
def get_income_time_series(self, freq="yearly", proxy=None) -> pd.DataFrame:
|
||||
res = self._income_time_series
|
||||
@@ -127,6 +66,7 @@ class Financials:
|
||||
res[freq] = self._fetch_time_series("cash-flow", freq, proxy=None)
|
||||
return res[freq]
|
||||
|
||||
@utils.log_indent_decorator
|
||||
def _fetch_time_series(self, name, timescale, proxy=None):
|
||||
# Fetching time series preferred over scraping 'QuoteSummaryStore',
|
||||
# because it matches what Yahoo shows. But for some tickers returns nothing,
|
||||
@@ -146,7 +86,7 @@ class Financials:
|
||||
if statement is not None:
|
||||
return statement
|
||||
except YFinanceException as e:
|
||||
logger.error("%s: Failed to create %s financials table for reason: %r", self._data.ticker, name, e)
|
||||
utils.get_yf_logger().error("%s: Failed to create %s financials table for reason: %r", self._data.ticker, name, e)
|
||||
return pd.DataFrame()
|
||||
|
||||
def _create_financials_table(self, name, timescale, proxy):
|
||||
@@ -154,37 +94,13 @@ class Financials:
|
||||
# Yahoo stores the 'income' table internally under 'financials' key
|
||||
name = "financials"
|
||||
|
||||
keys = self._get_datastore_keys(name, proxy)
|
||||
keys = const.fundamentals_keys[name]
|
||||
|
||||
try:
|
||||
return self.get_financials_time_series(timescale, keys, proxy)
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
def _get_datastore_keys(self, sub_page, proxy) -> list:
|
||||
data_stores = self._data.get_json_data_stores(sub_page, proxy)
|
||||
|
||||
# Step 1: get the keys:
|
||||
def _finditem1(key, obj):
|
||||
values = []
|
||||
if isinstance(obj, dict):
|
||||
if key in obj.keys():
|
||||
values.append(obj[key])
|
||||
for k, v in obj.items():
|
||||
values += _finditem1(key, v)
|
||||
elif isinstance(obj, list):
|
||||
for v in obj:
|
||||
values += _finditem1(key, v)
|
||||
return values
|
||||
|
||||
try:
|
||||
keys = _finditem1("key", data_stores['FinancialTemplateStore'])
|
||||
except KeyError as e:
|
||||
raise YFinanceDataException("Parsing FinancialTemplateStore failed, reason: {}".format(repr(e)))
|
||||
|
||||
if not keys:
|
||||
raise YFinanceDataException("No keys in FinancialTemplateStore")
|
||||
return keys
|
||||
|
||||
def get_financials_time_series(self, timescale, keys: list, proxy=None) -> pd.DataFrame:
|
||||
timescale_translation = {"yearly": "annual", "quarterly": "quarterly"}
|
||||
timescale = timescale_translation[timescale]
|
||||
@@ -233,89 +149,3 @@ class Financials:
|
||||
df = df[sorted(df.columns, reverse=True)]
|
||||
|
||||
return df
|
||||
|
||||
def get_income_scrape(self, freq="yearly", proxy=None) -> pd.DataFrame:
|
||||
res = self._income_scraped
|
||||
if freq not in res:
|
||||
res[freq] = self._scrape("income", freq, proxy=None)
|
||||
return res[freq]
|
||||
|
||||
def get_balance_sheet_scrape(self, freq="yearly", proxy=None) -> pd.DataFrame:
|
||||
res = self._balance_sheet_scraped
|
||||
if freq not in res:
|
||||
res[freq] = self._scrape("balance-sheet", freq, proxy=None)
|
||||
return res[freq]
|
||||
|
||||
def get_cash_flow_scrape(self, freq="yearly", proxy=None) -> pd.DataFrame:
|
||||
res = self._cash_flow_scraped
|
||||
if freq not in res:
|
||||
res[freq] = self._scrape("cash-flow", freq, proxy=None)
|
||||
return res[freq]
|
||||
|
||||
def _scrape(self, name, timescale, proxy=None):
|
||||
# Backup in case _fetch_time_series() fails to return data
|
||||
|
||||
allowed_names = ["income", "balance-sheet", "cash-flow"]
|
||||
allowed_timescales = ["yearly", "quarterly"]
|
||||
|
||||
if name not in allowed_names:
|
||||
raise ValueError("Illegal argument: name must be one of: {}".format(allowed_names))
|
||||
if timescale not in allowed_timescales:
|
||||
raise ValueError("Illegal argument: timescale must be one of: {}".format(allowed_names))
|
||||
|
||||
try:
|
||||
statement = self._create_financials_table_old(name, timescale, proxy)
|
||||
|
||||
if statement is not None:
|
||||
return statement
|
||||
except YFinanceException as e:
|
||||
logger.error("%s: Failed to create financials table for %s reason: %r", self._data.ticker, name, e)
|
||||
return pd.DataFrame()
|
||||
|
||||
def _create_financials_table_old(self, name, timescale, proxy):
|
||||
data_stores = self._data.get_json_data_stores("financials", proxy)
|
||||
|
||||
# Fetch raw data
|
||||
if not "QuoteSummaryStore" in data_stores:
|
||||
raise YFinanceDataException(f"Yahoo not returning legacy financials data")
|
||||
data = data_stores["QuoteSummaryStore"]
|
||||
|
||||
if name == "cash-flow":
|
||||
key1 = "cashflowStatement"
|
||||
key2 = "cashflowStatements"
|
||||
elif name == "balance-sheet":
|
||||
key1 = "balanceSheet"
|
||||
key2 = "balanceSheetStatements"
|
||||
else:
|
||||
key1 = "incomeStatement"
|
||||
key2 = "incomeStatementHistory"
|
||||
key1 += "History"
|
||||
if timescale == "quarterly":
|
||||
key1 += "Quarterly"
|
||||
if key1 not in data or data[key1] is None or key2 not in data[key1]:
|
||||
raise YFinanceDataException(f"Yahoo not returning legacy {name} financials data")
|
||||
data = data[key1][key2]
|
||||
|
||||
# Tabulate
|
||||
df = pd.DataFrame(data)
|
||||
if len(df) == 0:
|
||||
raise YFinanceDataException(f"Yahoo not returning legacy {name} financials data")
|
||||
df = df.drop(columns=['maxAge'])
|
||||
for col in df.columns:
|
||||
df[col] = df[col].replace('-', np.nan)
|
||||
df.set_index('endDate', inplace=True)
|
||||
try:
|
||||
df.index = pd.to_datetime(df.index, unit='s')
|
||||
except ValueError:
|
||||
df.index = pd.to_datetime(df.index)
|
||||
df = df.T
|
||||
df.columns.name = ''
|
||||
df.index.name = 'Breakdown'
|
||||
# rename incorrect yahoo key
|
||||
df.rename(index={'treasuryStock': 'gainsLossesNotAffectingRetainedEarnings'}, inplace=True)
|
||||
|
||||
# Upper-case first letter, leave rest unchanged:
|
||||
s0 = df.index[0]
|
||||
df.index = [s[0].upper()+s[1:] for s in df.index]
|
||||
|
||||
return df
|
||||
|
||||
@@ -8,8 +8,7 @@ import numpy as _np
|
||||
|
||||
from yfinance import utils
|
||||
from yfinance.data import TickerData
|
||||
|
||||
logger = utils.get_yf_logger()
|
||||
from yfinance.exceptions import YFNotImplementedError
|
||||
|
||||
info_retired_keys_price = {"currentPrice", "dayHigh", "dayLow", "open", "previousClose", "volume", "volume24Hr"}
|
||||
info_retired_keys_price.update({"regularMarket"+s for s in ["DayHigh", "DayLow", "Open", "PreviousClose", "Price", "Volume"]})
|
||||
@@ -21,10 +20,7 @@ info_retired_keys_symbol = {"symbol"}
|
||||
info_retired_keys = info_retired_keys_price | info_retired_keys_exchange | info_retired_keys_marketCap | info_retired_keys_symbol
|
||||
|
||||
|
||||
PRUNE_INFO = True
|
||||
# PRUNE_INFO = False
|
||||
_BASIC_URL_ = "https://query2.finance.yahoo.com/v10/finance/quoteSummary"
|
||||
|
||||
_BASIC_URL_ = "https://query2.finance.yahoo.com/v6/finance/quoteSummary"
|
||||
|
||||
from collections.abc import MutableMapping
|
||||
class InfoDictWrapper(MutableMapping):
|
||||
@@ -82,8 +78,6 @@ class FastInfo:
|
||||
# Contain small subset of info[] items that can be fetched faster elsewhere.
|
||||
# Imitates a dict.
|
||||
def __init__(self, tickerBaseObject):
|
||||
utils.print_once("yfinance: Note: 'Ticker.info' dict is now fixed & improved, 'fast_info' is no longer faster")
|
||||
|
||||
self._tkr = tickerBaseObject
|
||||
|
||||
self._prices_1y = None
|
||||
@@ -179,10 +173,9 @@ class FastInfo:
|
||||
def _get_1y_prices(self, fullDaysOnly=False):
|
||||
if self._prices_1y is None:
|
||||
# Temporarily disable error printing
|
||||
l = logger.level
|
||||
logger.setLevel(logging.CRITICAL)
|
||||
logging.disable(logging.CRITICAL)
|
||||
self._prices_1y = self._tkr.history(period="380d", auto_adjust=False, keepna=True)
|
||||
logger.setLevel(l)
|
||||
logging.disable(logging.NOTSET)
|
||||
self._md = self._tkr.get_history_metadata()
|
||||
try:
|
||||
ctp = self._md["currentTradingPeriod"]
|
||||
@@ -209,19 +202,17 @@ class FastInfo:
|
||||
def _get_1wk_1h_prepost_prices(self):
|
||||
if self._prices_1wk_1h_prepost is None:
|
||||
# Temporarily disable error printing
|
||||
l = logger.level
|
||||
logger.setLevel(logging.CRITICAL)
|
||||
logging.disable(logging.CRITICAL)
|
||||
self._prices_1wk_1h_prepost = self._tkr.history(period="1wk", interval="1h", auto_adjust=False, prepost=True)
|
||||
logger.setLevel(l)
|
||||
logging.disable(logging.NOTSET)
|
||||
return self._prices_1wk_1h_prepost
|
||||
|
||||
def _get_1wk_1h_reg_prices(self):
|
||||
if self._prices_1wk_1h_reg is None:
|
||||
# Temporarily disable error printing
|
||||
l = logger.level
|
||||
logger.setLevel(logging.CRITICAL)
|
||||
logging.disable(logging.CRITICAL)
|
||||
self._prices_1wk_1h_reg = self._tkr.history(period="1wk", interval="1h", auto_adjust=False, prepost=False)
|
||||
logger.setLevel(l)
|
||||
logging.disable(logging.NOTSET)
|
||||
return self._prices_1wk_1h_reg
|
||||
|
||||
def _get_exchange_metadata(self):
|
||||
@@ -299,9 +290,9 @@ class FastInfo:
|
||||
return self._shares
|
||||
|
||||
shares = self._tkr.get_shares_full(start=pd.Timestamp.utcnow().date()-pd.Timedelta(days=548))
|
||||
if shares is None:
|
||||
# Requesting 18 months failed, so fallback to shares which should include last year
|
||||
shares = self._tkr.get_shares()
|
||||
# if shares is None:
|
||||
# # Requesting 18 months failed, so fallback to shares which should include last year
|
||||
# shares = self._tkr.get_shares()
|
||||
if shares is not None:
|
||||
if isinstance(shares, pd.DataFrame):
|
||||
shares = shares[shares.columns[0]]
|
||||
@@ -568,9 +559,7 @@ class Quote:
|
||||
@property
|
||||
def info(self) -> dict:
|
||||
if self._info is None:
|
||||
# self._scrape(self.proxy) # decrypt broken
|
||||
self._fetch(self.proxy)
|
||||
|
||||
self._fetch_complementary(self.proxy)
|
||||
|
||||
return self._info
|
||||
@@ -578,150 +567,31 @@ class Quote:
|
||||
@property
|
||||
def sustainability(self) -> pd.DataFrame:
|
||||
if self._sustainability is None:
|
||||
self._scrape(self.proxy)
|
||||
raise YFNotImplementedError('sustainability')
|
||||
return self._sustainability
|
||||
|
||||
@property
|
||||
def recommendations(self) -> pd.DataFrame:
|
||||
if self._recommendations is None:
|
||||
self._scrape(self.proxy)
|
||||
raise YFNotImplementedError('recommendations')
|
||||
return self._recommendations
|
||||
|
||||
@property
|
||||
def calendar(self) -> pd.DataFrame:
|
||||
if self._calendar is None:
|
||||
self._scrape(self.proxy)
|
||||
raise YFNotImplementedError('calendar')
|
||||
return self._calendar
|
||||
|
||||
def _scrape(self, proxy):
|
||||
if self._already_scraped:
|
||||
return
|
||||
self._already_scraped = True
|
||||
|
||||
# get info and sustainability
|
||||
json_data = self._data.get_json_data_stores(proxy=proxy)
|
||||
try:
|
||||
quote_summary_store = json_data['QuoteSummaryStore']
|
||||
except KeyError:
|
||||
err_msg = "No summary info found, symbol may be delisted"
|
||||
logger.error('%s: %s', self._data.ticker, err_msg)
|
||||
return None
|
||||
|
||||
# sustainability
|
||||
d = {}
|
||||
try:
|
||||
if isinstance(quote_summary_store.get('esgScores'), dict):
|
||||
for item in quote_summary_store['esgScores']:
|
||||
if not isinstance(quote_summary_store['esgScores'][item], (dict, list)):
|
||||
d[item] = quote_summary_store['esgScores'][item]
|
||||
|
||||
s = pd.DataFrame(index=[0], data=d)[-1:].T
|
||||
s.columns = ['Value']
|
||||
s.index.name = '%.f-%.f' % (
|
||||
s[s.index == 'ratingYear']['Value'].values[0],
|
||||
s[s.index == 'ratingMonth']['Value'].values[0])
|
||||
|
||||
self._sustainability = s[~s.index.isin(
|
||||
['maxAge', 'ratingYear', 'ratingMonth'])]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
self._info = {}
|
||||
try:
|
||||
items = ['summaryProfile', 'financialData', 'quoteType',
|
||||
'defaultKeyStatistics', 'assetProfile', 'summaryDetail']
|
||||
for item in items:
|
||||
if isinstance(quote_summary_store.get(item), dict):
|
||||
self._info.update(quote_summary_store[item])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# For ETFs, provide this valuable data: the top holdings of the ETF
|
||||
try:
|
||||
if 'topHoldings' in quote_summary_store:
|
||||
self._info.update(quote_summary_store['topHoldings'])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
if not isinstance(quote_summary_store.get('summaryDetail'), dict):
|
||||
# For some reason summaryDetail did not give any results. The price dict
|
||||
# usually has most of the same info
|
||||
self._info.update(quote_summary_store.get('price', {}))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
# self._info['regularMarketPrice'] = self._info['regularMarketOpen']
|
||||
self._info['regularMarketPrice'] = quote_summary_store.get('price', {}).get(
|
||||
'regularMarketPrice', self._info.get('regularMarketOpen', None))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
self._info['preMarketPrice'] = quote_summary_store.get('price', {}).get(
|
||||
'preMarketPrice', self._info.get('preMarketPrice', None))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
self._info['logo_url'] = ""
|
||||
try:
|
||||
if not 'website' in self._info:
|
||||
self._info['logo_url'] = 'https://logo.clearbit.com/%s.com' % \
|
||||
self._info['shortName'].split(' ')[0].split(',')[0]
|
||||
else:
|
||||
domain = self._info['website'].split(
|
||||
'://')[1].split('/')[0].replace('www.', '')
|
||||
self._info['logo_url'] = 'https://logo.clearbit.com/%s' % domain
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Delete redundant info[] keys, because values can be accessed faster
|
||||
# elsewhere - e.g. price keys. Hope is reduces Yahoo spam effect.
|
||||
# But record the dropped keys, because in rare cases they are needed.
|
||||
self._retired_info = {}
|
||||
for k in info_retired_keys:
|
||||
if k in self._info:
|
||||
self._retired_info[k] = self._info[k]
|
||||
if PRUNE_INFO:
|
||||
del self._info[k]
|
||||
if PRUNE_INFO:
|
||||
# InfoDictWrapper will explain how to access above data elsewhere
|
||||
self._info = InfoDictWrapper(self._info)
|
||||
|
||||
# events
|
||||
try:
|
||||
cal = pd.DataFrame(quote_summary_store['calendarEvents']['earnings'])
|
||||
cal['earningsDate'] = pd.to_datetime(
|
||||
cal['earningsDate'], unit='s')
|
||||
self._calendar = cal.T
|
||||
self._calendar.index = utils.camel2title(self._calendar.index)
|
||||
self._calendar.columns = ['Value']
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
# analyst recommendations
|
||||
try:
|
||||
rec = pd.DataFrame(
|
||||
quote_summary_store['upgradeDowngradeHistory']['history'])
|
||||
rec['earningsDate'] = pd.to_datetime(
|
||||
rec['epochGradeDate'], unit='s')
|
||||
rec.set_index('earningsDate', inplace=True)
|
||||
rec.index.name = 'Date'
|
||||
rec.columns = utils.camel2title(rec.columns)
|
||||
self._recommendations = rec[[
|
||||
'Firm', 'To Grade', 'From Grade', 'Action']].sort_index()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _fetch(self, proxy):
|
||||
if self._already_fetched:
|
||||
return
|
||||
self._already_fetched = True
|
||||
modules = ['summaryProfile', 'financialData', 'quoteType',
|
||||
'defaultKeyStatistics', 'assetProfile', 'summaryDetail']
|
||||
modules = ['financialData', 'quoteType', 'defaultKeyStatistics', 'assetProfile', 'summaryDetail']
|
||||
params_dict = {}
|
||||
params_dict["modules"] = modules
|
||||
params_dict["ssl"] = "true"
|
||||
result = self._data.get_raw_json(
|
||||
_BASIC_URL_ + f"/{self._data.ticker}", params={"modules": ",".join(modules), "ssl": "true"}, proxy=proxy
|
||||
_BASIC_URL_ + f"/{self._data.ticker}", params=params_dict, proxy=proxy
|
||||
)
|
||||
result["quoteSummary"]["result"][0]["symbol"] = self._data.ticker
|
||||
query1_info = next(
|
||||
|
||||
@@ -235,6 +235,10 @@ class Ticker(TickerBase):
|
||||
def news(self):
|
||||
return self.get_news()
|
||||
|
||||
@property
|
||||
def trend_details(self) -> _pd.DataFrame:
|
||||
return self.get_trend_details()
|
||||
|
||||
@property
|
||||
def earnings_trend(self) -> _pd.DataFrame:
|
||||
return self.get_earnings_trend()
|
||||
|
||||
@@ -70,19 +70,107 @@ def print_once(msg):
|
||||
print(msg)
|
||||
|
||||
|
||||
## Logging
|
||||
# Note: most of this logic is adding indentation with function depth,
|
||||
# so that DEBUG log is readable.
|
||||
class IndentLoggerAdapter(logging.LoggerAdapter):
|
||||
def process(self, msg, kwargs):
|
||||
if get_yf_logger().isEnabledFor(logging.DEBUG):
|
||||
i = ' ' * self.extra['indent']
|
||||
if not isinstance(msg, str):
|
||||
msg = str(msg)
|
||||
msg = '\n'.join([i + m for m in msg.split('\n')])
|
||||
return msg, kwargs
|
||||
|
||||
import threading
|
||||
_indentation_level = threading.local()
|
||||
class IndentationContext:
|
||||
def __init__(self, increment=1):
|
||||
self.increment = increment
|
||||
def __enter__(self):
|
||||
_indentation_level.indent = getattr(_indentation_level, 'indent', 0) + self.increment
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
_indentation_level.indent -= self.increment
|
||||
|
||||
def get_indented_logger(name=None):
|
||||
# Never cache the returned value! Will break indentation.
|
||||
return IndentLoggerAdapter(logging.getLogger(name), {'indent': getattr(_indentation_level, 'indent', 0)})
|
||||
|
||||
def log_indent_decorator(func):
|
||||
def wrapper(*args, **kwargs):
|
||||
logger = get_indented_logger('yfinance')
|
||||
logger.debug(f'Entering {func.__name__}()')
|
||||
|
||||
with IndentationContext():
|
||||
result = func(*args, **kwargs)
|
||||
|
||||
logger.debug(f'Exiting {func.__name__}()')
|
||||
return result
|
||||
|
||||
return wrapper
|
||||
|
||||
class MultiLineFormatter(logging.Formatter):
|
||||
# The 'fmt' formatting further down is only applied to first line
|
||||
# of log message, specifically the padding after %level%.
|
||||
# For multi-line messages, need to manually copy over padding.
|
||||
def __init__(self, fmt):
|
||||
super().__init__(fmt)
|
||||
# Extract amount of padding
|
||||
match = _re.search(r'%\(levelname\)-(\d+)s', fmt)
|
||||
self.level_length = int(match.group(1)) if match else 0
|
||||
|
||||
def format(self, record):
|
||||
original = super().format(record)
|
||||
lines = original.split('\n')
|
||||
levelname = lines[0].split(' ')[0]
|
||||
if len(lines) <= 1:
|
||||
return original
|
||||
else:
|
||||
# Apply padding to all lines below first
|
||||
formatted = [lines[0]]
|
||||
if self.level_length == 0:
|
||||
padding = ' ' * len(levelname)
|
||||
else:
|
||||
padding = ' ' * self.level_length
|
||||
padding += ' ' # +1 for space between level and message
|
||||
formatted.extend(padding + line for line in lines[1:])
|
||||
return '\n'.join(formatted)
|
||||
|
||||
yf_logger = None
|
||||
yf_log_indented = False
|
||||
def get_yf_logger():
|
||||
global yf_logger
|
||||
if yf_logger is None:
|
||||
yf_logger = logging.getLogger("yfinance")
|
||||
if yf_logger.handlers is None or len(yf_logger.handlers) == 0:
|
||||
# Add stream handler if user not already added one
|
||||
h = logging.StreamHandler()
|
||||
formatter = logging.Formatter(fmt='%(levelname)s %(message)s')
|
||||
h.setFormatter(formatter)
|
||||
yf_logger.addHandler(h)
|
||||
yf_logger = logging.getLogger('yfinance')
|
||||
global yf_log_indented
|
||||
if yf_log_indented:
|
||||
yf_logger = get_indented_logger('yfinance')
|
||||
return yf_logger
|
||||
|
||||
def setup_debug_formatting():
|
||||
global yf_logger
|
||||
yf_logger = get_yf_logger()
|
||||
|
||||
if not yf_logger.isEnabledFor(logging.DEBUG):
|
||||
yf_logger.warning("logging mode not set to 'DEBUG', so not setting up debug formatting")
|
||||
return
|
||||
|
||||
if yf_logger.handlers is None or len(yf_logger.handlers) == 0:
|
||||
h = logging.StreamHandler()
|
||||
# Ensure different level strings don't interfere with indentation
|
||||
formatter = MultiLineFormatter(fmt='%(levelname)-8s %(message)s')
|
||||
h.setFormatter(formatter)
|
||||
yf_logger.addHandler(h)
|
||||
|
||||
global yf_log_indented
|
||||
yf_log_indented = True
|
||||
|
||||
def enable_debug_mode():
|
||||
get_yf_logger().setLevel(logging.DEBUG)
|
||||
setup_debug_formatting()
|
||||
|
||||
##
|
||||
|
||||
|
||||
def is_isin(string):
|
||||
return bool(_re.match("^([A-Z]{2})([A-Z0-9]{9})([0-9]{1})$", string))
|
||||
@@ -353,7 +441,7 @@ def _interval_to_timedelta(interval):
|
||||
elif interval == "1y":
|
||||
return _dateutil.relativedelta.relativedelta(years=1)
|
||||
elif interval == "1wk":
|
||||
return _pd.Timedelta(days=7, unit='d')
|
||||
return _pd.Timedelta(days=7)
|
||||
else:
|
||||
return _pd.Timedelta(interval)
|
||||
|
||||
@@ -567,11 +655,6 @@ def fix_Yahoo_returning_live_separate(quotes, interval, tz_exchange):
|
||||
|
||||
|
||||
def safe_merge_dfs(df_main, df_sub, interval):
|
||||
# Carefully merge 'df_sub' onto 'df_main'
|
||||
# If naive merge fails, try again with reindexing df_sub:
|
||||
# 1) if interval is weekly or monthly, then try with index set to start of week/month
|
||||
# 2) if still failing then manually search through df_main.index to reindex df_sub
|
||||
|
||||
if df_sub.shape[0] == 0:
|
||||
raise Exception("No data to merge")
|
||||
|
||||
@@ -581,6 +664,65 @@ def safe_merge_dfs(df_main, df_sub, interval):
|
||||
raise Exception("Expected 1 data col")
|
||||
data_col = data_cols[0]
|
||||
|
||||
df_main = df_main.sort_index()
|
||||
intraday = interval.endswith('m') or interval.endswith('s')
|
||||
|
||||
td = _interval_to_timedelta(interval)
|
||||
if intraday:
|
||||
# On some exchanges the event can occur before market open.
|
||||
# Problem when combining with intraday data.
|
||||
# Solution = use dates, not datetimes, to map/merge.
|
||||
df_main['_date'] = df_main.index.date
|
||||
df_sub['_date'] = df_sub.index.date
|
||||
indices = _np.searchsorted(_np.append(df_main['_date'], [df_main['_date'].iloc[-1]+td]), df_sub['_date'], side='left')
|
||||
df_main = df_main.drop('_date', axis=1)
|
||||
df_sub = df_sub.drop('_date', axis=1)
|
||||
else:
|
||||
indices = _np.searchsorted(_np.append(df_main.index, df_main.index[-1]+td), df_sub.index, side='right')
|
||||
indices -= 1 # Convert from [[i-1], [i]) to [[i], [i+1])
|
||||
# Numpy.searchsorted does not handle out-of-range well, so handle manually:
|
||||
for i in range(len(df_sub.index)):
|
||||
dt = df_sub.index[i]
|
||||
if dt < df_main.index[0] or dt >= df_main.index[-1]+td:
|
||||
# Out-of-range
|
||||
indices[i] = -1
|
||||
|
||||
f_outOfRange = indices == -1
|
||||
if f_outOfRange.any() and not intraday:
|
||||
# If dividend is occuring in next interval after last price row,
|
||||
# add a new row of NaNs
|
||||
last_dt = df_main.index[-1]
|
||||
next_interval_start_dt = last_dt + td
|
||||
if interval == '1d':
|
||||
# Allow for weekends & holidays
|
||||
next_interval_end_dt = last_dt+7*_pd.Timedelta(days=7)
|
||||
else:
|
||||
next_interval_end_dt = next_interval_start_dt + td
|
||||
for i in _np.where(f_outOfRange)[0]:
|
||||
dt = df_sub.index[i]
|
||||
if dt >= next_interval_start_dt and dt < next_interval_end_dt:
|
||||
new_dt = dt if interval == '1d' else next_interval_start_dt
|
||||
get_yf_logger().debug(f"Adding out-of-range {data_col} @ {dt.date()} in new prices row of NaNs")
|
||||
df_main.loc[new_dt] = _np.nan
|
||||
|
||||
# Re-calculate indices
|
||||
indices = _np.searchsorted(_np.append(df_main.index, df_main.index[-1]+td), df_sub.index, side='right')
|
||||
indices -= 1 # Convert from [[i-1], [i]) to [[i], [i+1])
|
||||
# Numpy.searchsorted does not handle out-of-range well, so handle manually:
|
||||
for i in range(len(df_sub.index)):
|
||||
dt = df_sub.index[i]
|
||||
if dt < df_main.index[0] or dt >= df_main.index[-1]+td:
|
||||
# Out-of-range
|
||||
indices[i] = -1
|
||||
|
||||
f_outOfRange = indices == -1
|
||||
if f_outOfRange.any():
|
||||
if intraday or interval in ['1d', '1wk']:
|
||||
raise Exception(f"The following '{data_col}' events are out-of-range, did not expect with interval {interval}: {df_sub.index}")
|
||||
get_yf_logger().debug(f'Discarding these {data_col} events:' + '\n' + str(df_sub[f_outOfRange]))
|
||||
df_sub = df_sub[~f_outOfRange].copy()
|
||||
indices = indices[~f_outOfRange]
|
||||
|
||||
def _reindex_events(df, new_index, data_col_name):
|
||||
if len(new_index) == len(set(new_index)):
|
||||
# No duplicates, easy
|
||||
@@ -602,106 +744,14 @@ def safe_merge_dfs(df_main, df_sub, interval):
|
||||
if "_NewIndex" in df.columns:
|
||||
df = df.drop("_NewIndex", axis=1)
|
||||
return df
|
||||
|
||||
df = df_main.join(df_sub)
|
||||
|
||||
f_na = df[data_col].isna()
|
||||
data_lost = sum(~f_na) < df_sub.shape[0]
|
||||
if not data_lost:
|
||||
return df
|
||||
# Lost data during join()
|
||||
# Backdate all df_sub.index dates to start of week/month
|
||||
if interval == "1wk":
|
||||
new_index = _pd.PeriodIndex(df_sub.index, freq='W').to_timestamp()
|
||||
elif interval == "1mo":
|
||||
new_index = _pd.PeriodIndex(df_sub.index, freq='M').to_timestamp()
|
||||
elif interval == "3mo":
|
||||
new_index = _pd.PeriodIndex(df_sub.index, freq='Q').to_timestamp()
|
||||
else:
|
||||
new_index = None
|
||||
|
||||
if new_index is not None:
|
||||
new_index = new_index.tz_localize(df.index.tz, ambiguous=True, nonexistent='shift_forward')
|
||||
df_sub = _reindex_events(df_sub, new_index, data_col)
|
||||
df = df_main.join(df_sub)
|
||||
|
||||
f_na = df[data_col].isna()
|
||||
data_lost = sum(~f_na) < df_sub.shape[0]
|
||||
if not data_lost:
|
||||
return df
|
||||
# Lost data during join(). Manually check each df_sub.index date against df_main.index to
|
||||
# find matching interval
|
||||
df_sub = df_sub_backup.copy()
|
||||
new_index = [-1] * df_sub.shape[0]
|
||||
for i in range(df_sub.shape[0]):
|
||||
dt_sub_i = df_sub.index[i]
|
||||
if dt_sub_i in df_main.index:
|
||||
new_index[i] = dt_sub_i
|
||||
continue
|
||||
# Found a bad index date, need to search for near-match in df_main (same week/month)
|
||||
fixed = False
|
||||
for j in range(df_main.shape[0] - 1):
|
||||
dt_main_j0 = df_main.index[j]
|
||||
dt_main_j1 = df_main.index[j + 1]
|
||||
if (dt_main_j0 <= dt_sub_i) and (dt_sub_i < dt_main_j1):
|
||||
fixed = True
|
||||
if interval.endswith('h') or interval.endswith('m'):
|
||||
# Must also be same day
|
||||
fixed = (dt_main_j0.date() == dt_sub_i.date()) and (dt_sub_i.date() == dt_main_j1.date())
|
||||
if fixed:
|
||||
dt_sub_i = dt_main_j0
|
||||
break
|
||||
if not fixed:
|
||||
last_main_dt = df_main.index[df_main.shape[0] - 1]
|
||||
diff = dt_sub_i - last_main_dt
|
||||
if interval == "1mo" and last_main_dt.month == dt_sub_i.month:
|
||||
dt_sub_i = last_main_dt
|
||||
fixed = True
|
||||
elif interval == "3mo" and last_main_dt.year == dt_sub_i.year and last_main_dt.quarter == dt_sub_i.quarter:
|
||||
dt_sub_i = last_main_dt
|
||||
fixed = True
|
||||
elif interval == "1wk":
|
||||
if last_main_dt.week == dt_sub_i.week:
|
||||
dt_sub_i = last_main_dt
|
||||
fixed = True
|
||||
elif (dt_sub_i >= last_main_dt) and (dt_sub_i - last_main_dt < _datetime.timedelta(weeks=1)):
|
||||
# With some specific start dates (e.g. around early Jan), Yahoo
|
||||
# messes up start-of-week, is Saturday not Monday. So check
|
||||
# if same week another way
|
||||
dt_sub_i = last_main_dt
|
||||
fixed = True
|
||||
elif interval == "1d" and last_main_dt.day == dt_sub_i.day:
|
||||
dt_sub_i = last_main_dt
|
||||
fixed = True
|
||||
elif interval == "1h" and last_main_dt.hour == dt_sub_i.hour:
|
||||
dt_sub_i = last_main_dt
|
||||
fixed = True
|
||||
elif interval.endswith('m') or interval.endswith('h'):
|
||||
td = _pd.to_timedelta(interval)
|
||||
if (dt_sub_i >= last_main_dt) and (dt_sub_i - last_main_dt < td):
|
||||
dt_sub_i = last_main_dt
|
||||
fixed = True
|
||||
new_index[i] = dt_sub_i
|
||||
new_index = df_main.index[indices]
|
||||
df_sub = _reindex_events(df_sub, new_index, data_col)
|
||||
df = df_main.join(df_sub)
|
||||
|
||||
df = df_main.join(df_sub)
|
||||
f_na = df[data_col].isna()
|
||||
data_lost = sum(~f_na) < df_sub.shape[0]
|
||||
if data_lost:
|
||||
## Not always possible to match events with trading, e.g. when released pre-market.
|
||||
## So have to append to bottom with nan prices.
|
||||
## But should only be impossible with intra-day price data.
|
||||
if interval.endswith('m') or interval.endswith('h') or interval == "1d":
|
||||
# Update: is possible with daily data when dividend very recent
|
||||
f_missing = ~df_sub.index.isin(df.index)
|
||||
df_sub_missing = df_sub[f_missing].copy()
|
||||
keys = {"Adj Open", "Open", "Adj High", "High", "Adj Low", "Low", "Adj Close",
|
||||
"Close"}.intersection(df.columns)
|
||||
df_sub_missing[list(keys)] = _np.nan
|
||||
col_ordering = df.columns
|
||||
df = _pd.concat([df, df_sub_missing], sort=True)[col_ordering]
|
||||
else:
|
||||
raise Exception("Lost data during merge despite all attempts to align data (see above)")
|
||||
raise Exception('Data was lost in merge, investigate')
|
||||
|
||||
return df
|
||||
|
||||
@@ -867,14 +917,21 @@ class _KVStore:
|
||||
|
||||
def get(self, key: str) -> Union[str, None]:
|
||||
"""Get value for key if it exists else returns None"""
|
||||
item = self.conn.execute('select value from "kv" where key=?', (key,))
|
||||
try:
|
||||
item = self.conn.execute('select value from "kv" where key=?', (key,))
|
||||
except _sqlite3.IntegrityError as e:
|
||||
self.delete(key)
|
||||
return None
|
||||
if item:
|
||||
return next(item, (None,))[0]
|
||||
|
||||
def set(self, key: str, value: str) -> None:
|
||||
with self._cache_mutex:
|
||||
self.conn.execute('replace into "kv" (key, value) values (?,?)', (key, value))
|
||||
self.conn.commit()
|
||||
if value is None:
|
||||
self.delete(key)
|
||||
else:
|
||||
with self._cache_mutex:
|
||||
self.conn.execute('replace into "kv" (key, value) values (?,?)', (key, value))
|
||||
self.conn.commit()
|
||||
|
||||
def bulk_set(self, kvdata: Dict[str, str]):
|
||||
records = tuple(i for i in kvdata.items())
|
||||
@@ -898,7 +955,11 @@ class _TzCache:
|
||||
def __init__(self):
|
||||
self._setup_cache_folder()
|
||||
# Must init db here, where is thread-safe
|
||||
self._tz_db = _KVStore(_os.path.join(self._db_dir, "tkr-tz.db"))
|
||||
try:
|
||||
self._tz_db = _KVStore(_os.path.join(self._db_dir, "tkr-tz.db"))
|
||||
except _sqlite3.DatabaseError as err:
|
||||
raise _TzCacheException("Error creating TzCache folder: '{}' reason: {}"
|
||||
.format(self._db_dir, err))
|
||||
self._migrate_cache_tkr_tz()
|
||||
|
||||
def _setup_cache_folder(self):
|
||||
@@ -946,7 +1007,17 @@ class _TzCache:
|
||||
except TypeError:
|
||||
_os.remove(old_cache_file_path)
|
||||
else:
|
||||
self.tz_db.bulk_set(df.to_dict()['Tz'])
|
||||
# Discard corrupt data:
|
||||
df = df[~df["Tz"].isna().to_numpy()]
|
||||
df = df[~(df["Tz"]=='').to_numpy()]
|
||||
df = df[~df.index.isna()]
|
||||
if not df.empty:
|
||||
try:
|
||||
self.tz_db.bulk_set(df.to_dict()['Tz'])
|
||||
except Exception as e:
|
||||
# Ignore
|
||||
pass
|
||||
|
||||
_os.remove(old_cache_file_path)
|
||||
|
||||
|
||||
@@ -977,10 +1048,10 @@ def get_tz_cache():
|
||||
try:
|
||||
_tz_cache = _TzCache()
|
||||
except _TzCacheException as err:
|
||||
logger.error("Failed to create TzCache, reason: %s. "
|
||||
"TzCache will not be used. "
|
||||
"Tip: You can direct cache to use a different location with 'set_tz_cache_location(mylocation)'",
|
||||
err)
|
||||
get_yf_logger().info("Failed to create TzCache, reason: %s. "
|
||||
"TzCache will not be used. "
|
||||
"Tip: You can direct cache to use a different location with 'set_tz_cache_location(mylocation)'",
|
||||
err)
|
||||
_tz_cache = _TzCacheDummy()
|
||||
|
||||
return _tz_cache
|
||||
|
||||
@@ -1 +1 @@
|
||||
version = "0.2.19b4"
|
||||
version = "0.2.24"
|
||||
|
||||
Reference in New Issue
Block a user