Compare commits

...

129 Commits

Author SHA1 Message Date
ValueRaider
efd278a3e0 Relax requests_cache purging - allow empty earnings calendar table 2023-02-07 20:38:51 +00:00
ValueRaider
4d8ca3777a Refactor check_Yahoo_response() to work with latest decryption 2023-02-07 20:37:06 +00:00
ValueRaider
14c6136699 Merge branch 'dev' into feature/session-prune-v2 2023-02-07 13:38:32 +00:00
ValueRaider
b462836540 Merge pull request #1385 from ranaroussi/fix/download-tz-behaviour
Restore original download() timezone handling
2023-02-07 13:16:03 +00:00
ValueRaider
645cc19037 Merge pull request #1379 from ranaroussi/feature/improve-decrypt
Add another backup decrypt option
2023-02-06 22:24:22 +00:00
ValueRaider
86d6acccf7 Fix dumb bugs in price repair - 1 more 2023-02-05 18:17:47 +00:00
ValueRaider
4fa32a98ed Merge pull request #1397 from Matt-Seath/dev
Catch TypeError Exception
2023-02-05 13:49:48 +00:00
Matt Seath
35f4071c0b Catch TypeError Exception
Addresses recent issue where calling Ticker.info would occasionally result in a TypeError Exception at line 287.
2023-02-05 11:49:40 +10:00
ValueRaider
86b00091a9 Fix dumb bugs in price repair 2023-02-02 21:57:55 +00:00
ValueRaider
2a2928b4a0 Fix 'tradingPeriods' parsing when empty - 0.2.10b2 2023-02-01 13:31:54 +00:00
ValueRaider
d47133e5bf Dev version 0.2.10b1 2023-01-31 22:12:11 +00:00
ValueRaider
8f0c58dafa Dev version 0.2.10b0 2023-01-31 22:02:41 +00:00
ValueRaider
27a721c7dd Merge pull request #1380 from ranaroussi/fix/old-sqlite-error
Allow using sqlite3 < 3.8.2
2023-01-31 19:52:22 +00:00
ValueRaider
3e964d5319 Merge pull request #1383 from ranaroussi/fix/fast-info-prepost
Fix fast_info["previousClose"]
2023-01-31 19:51:46 +00:00
ValueRaider
84a31ae0b4 Merge pull request #1311 from ranaroussi/feature/prices-metadata-prune-prepost
Drop intraday intervals if in post-market but prepost=False
2023-01-31 19:50:00 +00:00
ValueRaider
891b533ec2 Drop intraday intervals if in prepost but prepost=False 2023-01-31 19:48:47 +00:00
ValueRaider
b9fb3e4979 Restore original download() tz handling: day/week/etc = ignore 2023-01-31 00:00:45 +00:00
ValueRaider
09342982a4 Add 'quoteType'. Improve handling tickers without trading 2023-01-30 23:53:06 +00:00
ValueRaider
da8c49011e fast_info: Fix previousClose & yearChange 2023-01-30 16:06:55 +00:00
ValueRaider
b805f0a010 Add another backup decrypt option 2023-01-29 23:09:45 +00:00
ValueRaider
5b0feb3d20 Fix tests 2023-01-29 16:53:26 +00:00
ValueRaider
c3d7449844 Merge pull request #1289 from ranaroussi/fix/price-repair
Fix & improve price repair
2023-01-29 13:02:48 +00:00
ValueRaider
a4f11b0243 Fix price repair tests, remove unrelated changes 2023-01-29 13:01:54 +00:00
ValueRaider
464b3333d7 Allow using sqlite3 < 3.8.2 2023-01-29 00:34:46 +00:00
ValueRaider
685f2ec351 Merge branch 'dev' into fix/price-repair 2023-01-28 23:26:56 +00:00
ValueRaider
aad46baf28 price repair: Fix 'min_dt', add 'silent' mode 2023-01-28 23:14:28 +00:00
ValueRaider
af5f96f97e Merge pull request #1368 from ranaroussi/fix/fast-info-camel-case
`fast_info` usability improvements
2023-01-28 22:28:42 +00:00
ValueRaider
a4bdaea888 fast_info: add camelCase, items() & values() 2023-01-28 22:27:51 +00:00
ValueRaider
ac5a9d2793 Merge pull request #1367 from ranaroussi/main
main -> dev
2023-01-27 22:09:59 +00:00
ValueRaider
b17ad32a47 Merge pull request #1366 from ranaroussi/doc/readme-explain-instability
README: comment on instability, tidy Ticker 'Quick start'
2023-01-27 18:31:32 +00:00
ValueRaider
af39855e28 README: comment on instability, tidy Ticker 'Quick start' 2023-01-27 17:36:25 +00:00
ValueRaider
ac6e047f0d Bump version to 0.2.9 2023-01-26 22:21:46 +00:00
ValueRaider
1e24337f29 Bump version to 0.2.8 2023-01-26 22:20:11 +00:00
ValueRaider
2cc82ae12f Merge pull request #1362 from ranaroussi/hotfix/fast-info-bugs
Ticker.fast_info: fix teething bugs
2023-01-26 22:03:06 +00:00
ValueRaider
d11f385049 Make fast_info JSON-serializable via toJSON() 2023-01-26 21:45:53 +00:00
ValueRaider
7377611e1f Add 'get(key, default)' to fast_info 2023-01-26 21:23:31 +00:00
ValueRaider
f3b5fb85c9 Remove exception raise from 'get_shares_full()' 2023-01-26 21:14:48 +00:00
ValueRaider
a4faef83ac 'fast_info' fixes: unusual symbols ; improve migration message ; 'regular_market_previous_close' 2023-01-26 21:02:18 +00:00
ValueRaider
e1184f745b Update yahoo-keys.txt 2023-01-26 17:06:03 +00:00
ValueRaider
fe630008e9 Bump version to 0.2.7 2023-01-26 17:03:00 +00:00
ValueRaider
b43072cf0a Merge pull request #1354 from ranaroussi/hotfix/rename-basic-info
Rename 'basic_info' -> 'fast_info'
2023-01-26 17:00:54 +00:00
ValueRaider
ad3f4cabc9 Improve 'get_shares_full()' error handling 2023-01-26 16:58:26 +00:00
ValueRaider
f70567872c Merge pull request #1353 from ranaroussi/hotfix/smart-decryption
Add decrypt key extraction from JS + GitHub backup
2023-01-26 16:44:23 +00:00
ValueRaider
a8ade72113 Rename 'basic_info' -> 'fast_info' ; Fix info tests 2023-01-26 16:36:25 +00:00
ValueRaider
1dcc8c9c8b Remove dead debug code 2023-01-26 14:57:15 +00:00
ValueRaider
dd5462b307 Add decrypt key extraction from JS + GitHub backup 2023-01-26 14:52:18 +00:00
ValueRaider
e39c03e8e3 Hardcode decrypt keys in GitHub for fix w/o PIP
`yfinance` will query this file via web request as a last resort. Avoids having to release a new PIP version just for a key update.
2023-01-26 14:20:03 +00:00
ValueRaider
9297504b84 Merge pull request #1346 from ranaroussi/main
main -> dev sync
2023-01-25 22:16:22 +00:00
ValueRaider
3971115ab9 Bump version to 0.2.6 2023-01-25 19:10:31 +00:00
ValueRaider
b5badbbc61 Merge pull request #1342 from ranaroussi/hotfix/basic_info
Fix 'Ticker.basic_info' lazy-loading
2023-01-25 19:09:37 +00:00
ValueRaider
ba8621f5be Fix Ticker.basic_info.keys() calling each method 2023-01-25 18:35:54 +00:00
ValueRaider
8e5c94a4eb Bump version to 0.2.5 2023-01-25 16:45:30 +00:00
ValueRaider
66a1c1a174 Merge pull request #1337 from ranaroussi/dev
dev -> main
2023-01-25 16:40:56 +00:00
ValueRaider
ab6214df79 Merge pull request #1336 from ranaroussi/hotfix/decryption
Hardcode decryption keys
2023-01-25 16:40:38 +00:00
ValueRaider
dc5d42c8e2 Add another key 2023-01-25 15:46:07 +00:00
ValueRaider
ab75495cd3 Hardcode decryption keys 2023-01-25 14:45:04 +00:00
ValueRaider
39c1ecc7a2 Improve price repair - reduce spam, improve data reliability
Extend 'reconstruct groups' to reduce Yahoo spam ; Extend fetch range to avoid first/last day irregularities ; Improve handling of 'max fetch days' Yahoo limit
2023-01-25 14:37:43 +00:00
ValueRaider
af7720668c Merge pull request #1328 from CollieIsCute/main
use dict comprehension to improve speed
2023-01-25 13:42:44 +00:00
Collie Tsai
9051fba601 use dict comprehension to improve speed 2023-01-25 21:15:54 +08:00
ValueRaider
03ea6acec0 Merge pull request #1317 from ranaroussi/feature/prune-info
`Ticker.basic_info` - fast but minimal alternative to `info[]`
2023-01-25 11:28:22 +00:00
ValueRaider
ddc93033d7 Reorder contents of bug_report.md 2023-01-23 11:53:00 +00:00
ValueRaider
eb6d830e2a Fix repair volume=0 ; Tidy code 2023-01-21 23:00:30 +00:00
ValueRaider
2b0ae5a6c1 Remove 'repair_intervals' 2023-01-21 16:58:45 +00:00
ValueRaider
1636839b67 Handle request to reconstruct 1m 2023-01-20 00:13:28 +00:00
ValueRaider
65b97d024b Improve reporting 2023-01-20 00:13:02 +00:00
ValueRaider
fb77d35863 Update README 2023-01-19 22:33:54 +00:00
ValueRaider
197d2968e3 Add 'repair_intervals', rename 'repair'->'repair_prices' 2023-01-19 22:19:16 +00:00
ValueRaider
7460dbea17 If reconstructing 1d interval with 1h, always request prepost 2023-01-19 22:18:46 +00:00
ValueRaider
b49fd797fc Fix & improve price repair
Fix repair calibration & volume=0 repair ; Extend repair to sub-hour ; Avoid attempting repair of mostly-NaN days
2023-01-19 22:18:46 +00:00
ValueRaider
6bd8fb2290 Improve test ; Add more keys to basic_info 2023-01-19 14:57:34 +00:00
ValueRaider
cd1e16ad9e Add test ; Fix 1y price stats 2023-01-19 00:37:17 +00:00
ValueRaider
3fd9ea2204 Remove more info[] keys - #2 2023-01-18 16:55:31 +00:00
ValueRaider
d5a1266cbe Remove more info[] keys 2023-01-17 20:13:32 +00:00
ValueRaider
89bbe8ad4c Override Ticker.basic_info __str__() 2023-01-17 19:49:42 +00:00
ValueRaider
e44c6f8b0e Add 'Ticker.basic_info' 2023-01-17 14:10:28 +00:00
ValueRaider
0ba810fda5 Improve 'history_metadata' formatting 2023-01-16 18:30:28 +00:00
ValueRaider
677bbfed8b Add Ticker.market_cap helper ; Tidy info[] blacklist 2023-01-16 11:23:35 +00:00
ValueRaider
97671b78dd Move info migrate msgs from 'is in' to '[]' 2023-01-14 23:11:02 +00:00
ValueRaider
2865c0df9f Prune info[] with migration instructions
Remove redundant keys from info[] that are better found elsewhere ; Print instructions if old keys accessed via InfoDictWrapper
2023-01-14 23:07:04 +00:00
ValueRaider
0c037ddd12 Bump version to 0.2.4 2023-01-14 22:58:53 +00:00
ValueRaider
3ee4674098 Merge pull request #1302 from ranaroussi/dev
dev -> main
2023-01-14 22:58:33 +00:00
ValueRaider
5d9a91da4a Improve 'get_shares_full()' error handling ; Minor fixes 2023-01-14 22:44:54 +00:00
ValueRaider
47c579ff22 Merge pull request #1297 from alexa-infra/fix-stores-decryption
Fix stores decrypt
2023-01-14 20:06:52 +00:00
ValueRaider
caf5cba801 Merge pull request #1301 from ranaroussi/feature/share-count
Feature/share count
2023-01-14 19:53:45 +00:00
ValueRaider
486c7894ce get_shares_full(): convert to pd.Series, add test 2023-01-14 17:32:54 +00:00
ValueRaider
db8a00edae get_shares_full(): remove caching, tidy API 2023-01-14 17:11:57 +00:00
ValueRaider
805523b924 Fix 'get_shares_full()' post-rebase 2023-01-14 16:58:58 +00:00
ValueRaider
32ab2e648d get_shares_full() set default range 1yr 2023-01-14 16:35:54 +00:00
ValueRaider
4d91ae740a Add date args to 'shares_full()' and caching 2023-01-14 16:35:54 +00:00
ValueRaider
05ec4b4312 Add full share count history via 'shares_full' 2023-01-14 16:35:51 +00:00
ValueRaider
cd2c1ada14 Improve decrypt key deduction 2023-01-14 15:41:33 +00:00
ValueRaider
4ca9642403 Ensure 'requests_cache' responses processed ; Improve naming 2023-01-14 14:20:40 +00:00
Alexey Vasilyev
b438f29a71 Fix decryption 2023-01-14 08:06:35 +01:00
ValueRaider
4db178b8d6 Merge pull request #1284 from ranaroussi/fix/financials-caching
Improve caching of financials data
2023-01-12 11:47:04 +00:00
ValueRaider
38637a9821 Merge pull request #1283 from DE0CH/ignore-tz-false
Change default value to ignore_tz to False
2023-01-08 12:45:00 +00:00
Deyao Chen
de8c0bdcdd Change default value to ignore_tz to False
Bring the behavior of download() to be the same as 0.1.77.
2023-01-08 11:47:13 +08:00
ValueRaider
fd35975cf9 Improve caching of financials data 2023-01-07 18:02:16 +00:00
ValueRaider
1495834a09 Merge pull request #1276 from gogog22510/main
Fix the database lock error in multithread download
2023-01-04 23:10:22 +00:00
ValueRaider
2a7588dead Tidy DB lock fix 2023-01-04 21:32:54 +00:00
gogog22510
051de748b9 Fix the database lock error in multithread download 2023-01-04 12:37:59 -05:00
ValueRaider
8b9faf15b3 Move requests_cache pruning into hook, enable-by-default 2022-12-22 13:59:49 +00:00
ValueRaider
71362f2252 Tests: rename 'dat' -> 'ticker' 2022-12-21 21:56:23 +00:00
ValueRaider
287cb0786e Simplify midnight calculation 2022-12-21 21:51:23 +00:00
ValueRaider
0840b602b4 Default disable requests_cache pruning ; Add a prune check ; Improve doc 2022-12-21 21:38:08 +00:00
ValueRaider
6c0b4ddb7b Rebase 'session-prune-v2' to 'dev' 2022-12-21 21:34:18 +00:00
ValueRaider
97adb30d41 Merge pull request #1262 from ranaroussi/main
Sync `main` -> `dev`
2022-12-20 20:42:10 +00:00
ValueRaider
eacfbc45c0 Bump version to 0.2.3 2022-12-20 11:57:04 +00:00
ValueRaider
8deddd7ee9 Make financials API '_' use consistent 2022-12-20 11:56:57 +00:00
ValueRaider
beb494b67e README: add small section on version 0.2 2022-12-20 11:37:16 +00:00
ValueRaider
e2948a8b48 Bump version to 0.2.2 2022-12-20 11:33:04 +00:00
ValueRaider
ff3d3f2f78 Restore 'financials' attribute (map to 'income_stmt') 2022-12-20 11:32:19 +00:00
ValueRaider
85783da515 README: update 'repair' doc 2022-12-19 23:30:29 +00:00
ValueRaider
9dbfad4294 Bump version to 0.2.1 2022-12-19 23:19:42 +00:00
ValueRaider
5e54b92efd Fix _reconstruct_intervals_batch() calibration bug 2022-12-19 18:09:06 +00:00
ValueRaider
cffdbd47b5 Merge pull request #1253 from Rogach/pr/decode-stores
decode encrypted root.App.main.context.dispatcher.stores
2022-12-19 12:29:57 +00:00
ValueRaider
f398f46509 Switch 'pycryptodome' -> 'cryptography' 2022-12-19 12:28:51 +00:00
ValueRaider
097c76aa46 Add 'pycryptodome' requirement 2022-12-18 13:26:12 +00:00
ValueRaider
a9da16e048 Fix get_json_data_stores() behaviour 2022-12-18 13:19:11 +00:00
Platon Pronko
8e5f0984af decode encrypted root.App.main.context.dispatcher.stores 2022-12-18 11:40:26 +04:00
ValueRaider
38b738e766 Bump version to 0.2.0rc5 2022-12-16 16:27:46 +00:00
ValueRaider
55772d30a4 Merge pull request #1245 from ranaroussi/dev
Merge dev -> main for release 0.2.0rc5
2022-12-16 16:25:36 +00:00
ValueRaider
382285cfd9 Remove hardcoded paths 2022-12-16 16:24:16 +00:00
ValueRaider
d2e5ce284e Merge pull request #1243 from ranaroussi/fix/financials-error-handling
Improve financials error handling
2022-12-16 16:20:25 +00:00
ValueRaider
88d21d742d Merge pull request #1244 from ranaroussi/fix/repair-100x
Fix '100x price' repair
2022-12-16 16:20:17 +00:00
ValueRaider
7a0356d47b Document financials get() methods 2022-12-16 16:19:37 +00:00
ValueRaider
a13bf0cd6c Hide divide-by-0 warnings 2022-12-16 15:05:38 +00:00
ValueRaider
7cacf233ce Improve financials error handling
Nicely intercept parse errors in get_json_data_stores() & _create_financials_table_old() ; Improve exception messages ; Fix typo 'YFiance'
2022-12-16 13:22:17 +00:00
ValueRaider
b48212e420 Repair-100x now tolerates zeroes 2022-12-14 21:16:16 +00:00
ValueRaider
e7bf3607e8 Fix tests 2022-12-13 21:41:46 +00:00
20 changed files with 2267 additions and 346 deletions

View File

@@ -23,20 +23,20 @@ and comparing against [PIP](https://pypi.org/project/yfinance/#history).
### Does Yahoo actually have the data?
Visit `finance.yahoo.com` and confim they have your data. Maybe your ticker was delisted.
Are spelling ticker *exactly* same as Yahoo?
Then check that you are spelling ticker *exactly* same as Yahoo.
Visit `finance.yahoo.com` and confim they have your data. Maybe your ticker was delisted.
### Are you spamming Yahoo?
Yahoo Finance free service has limit on query rate (roughly 100/s). Them delaying or blocking your spam is not a bug.
Yahoo Finance free service has limit on query rate dependent on request - roughly 500/minute for prices, 10/minute for info. Them delaying or blocking your spam is not a bug.
### Still think it's a bug?
Delete this default message and submit your bug report here, providing the following as best you can:
- Simple code that reproduces your problem
- Error message, with traceback if shown
- Info about your system:
- yfinance version
- operating system
- Simple code that reproduces your problem
- The error message

View File

@@ -1,6 +1,49 @@
Change Log
===========
0.2.9
-----
- Fix fast_info bugs #1362
0.2.7
-----
- Fix Yahoo decryption, smarter this time #1353
- Rename basic_info -> fast_info #1354
0.2.6
-----
- Fix Ticker.basic_info lazy-loading #1342
0.2.5
-----
- Fix Yahoo data decryption again #1336
- New: Ticker.basic_info - faster Ticker.info #1317
0.2.4
-----
- Fix Yahoo data decryption #1297
- New feature: 'Ticker.get_shares_full()' #1301
- Improve caching of financials data #1284
- Restore download() original alignment behaviour #1283
- Fix the database lock error in multithread download #1276
0.2.3
-----
- Make financials API '_' use consistent
0.2.2
-----
- Restore 'financials' attribute (map to 'income_stmt')
0.2.1
-----
Release!
0.2.0rc5
--------
- Improve financials error handling #1243
- Fix '100x price' repair #1244
0.2.0rc4
--------
- Access to old financials tables via `get_income_stmt(legacy=True)`

View File

@@ -42,6 +42,11 @@ Yahoo! finance API is intended for personal use only.**
---
## News [2023-01-27]
Since December 2022 Yahoo has been encrypting the web data that `yfinance` scrapes for non-market data. Fortunately the decryption keys are available, although Yahoo moved/changed them several times hence `yfinance` breaking several times. `yfinance` is now better prepared for any future changes by Yahoo.
Why is Yahoo doing this? We don't know. Is it to stop scrapers? Maybe, so we've implemented changes to reduce load on Yahoo. In December we rolled out version 0.2 with optimised scraping. Then in 0.2.6 introduced `Ticker.fast_info`, providing much faster access to some `info` elements wherever possible e.g. price stats and forcing users to switch (sorry but we think necessary). `info` will continue to exist for as long as there are elements without a fast alternative.
## Quick Start
### The Ticker module
@@ -53,30 +58,28 @@ import yfinance as yf
msft = yf.Ticker("MSFT")
# get stock info
# get all stock info (slow)
msft.info
# fast access to subset of stock info (opportunistic)
msft.fast_info
# get historical market data
hist = msft.history(period="max")
hist = msft.history(period="1mo")
# show meta information about the history (requires history() to be called first)
msft.history_metadata
# show actions (dividends, splits, capital gains)
msft.actions
# show dividends
msft.dividends
# show splits
msft.splits
# show capital gains (for mutual funds & etfs)
msft.capital_gains
msft.capital_gains # only for mutual funds & etfs
# show share count
# - yearly summary:
msft.shares
# - accurate time-series count:
msft.get_shares_full(start="2022-01-01", end=None)
# show financials:
# - income statement
@@ -90,13 +93,9 @@ msft.cashflow
msft.quarterly_cashflow
# see `Ticker.get_income_stmt()` for more options
# show major holders
# show holders
msft.major_holders
# show institutional holders
msft.institutional_holders
# show mutualfund holders
msft.mutualfund_holders
# show earnings
@@ -155,19 +154,6 @@ msft.option_chain(..., proxy="PROXY_SERVER")
...
```
To use a custom `requests` session (for example to cache calls to the
API or customize the `User-agent` header), pass a `session=` argument to
the Ticker constructor.
```python
import requests_cache
session = requests_cache.CachedSession('yfinance.cache')
session.headers['User-agent'] = 'my-program/1.0'
ticker = yf.Ticker('msft', session=session)
# The scraped response will be stored in the cache
ticker.actions
```
To initialize multiple `Ticker` objects, use
```python
@@ -181,6 +167,29 @@ tickers.tickers['AAPL'].history(period="1mo")
tickers.tickers['GOOG'].actions
```
### Caching
Heavy users will quickly encounter Yahoo's rate limits on free use.
A `requests` session can help by caching web requests.
To use, pass a `session=` argument to the Ticker constructor:
```python
import requests_cache
session = requests_cache.CachedSession('yfinance.cache')
# session.headers['User-agent'] = 'my-program/1.0' # <- Optional
ticker = yf.Ticker('msft aapl goog', session=session)
# The scraped response will be stored in the cache
ticker.actions
```
To assist, `yfinance` removes requests from cache that failed to parse.
To disable this feature call `yfinance.disable_prune_session_cache()`.
Add expiration to the session to prune old data:
```python
session = requests_cache.CachedSession('yfinance.cache', expire_after=datetime.timedelta(minutes=60))
```
More info here: https://requests-cache.readthedocs.io/en/stable/user_guide/expiration.html
### Fetching data for multiple tickers
```python
@@ -206,8 +215,7 @@ data = yf.download( # or pdr.get_data_yahoo(...
interval = "5d",
# Whether to ignore timezone when aligning ticker data from
# different timezones. Default is True. False may be useful for
# minute/hourly data.
# different timezones. Default is False.
ignore_tz = False,
# group by ticker (to access via data['SPY'])
@@ -218,7 +226,7 @@ data = yf.download( # or pdr.get_data_yahoo(...
# (optional, default is False)
auto_adjust = True,
# identify and attempt repair of currency unit mixups e.g. $/cents
# attempt repair of Yahoo data issues
repair = False,
# download pre/post regular market hours data
@@ -306,6 +314,7 @@ To install `yfinance` using `conda`, see
- [frozendict](https://pypi.org/project/frozendict) \>= 2.3.4
- [beautifulsoup4](https://pypi.org/project/beautifulsoup4) \>= 4.11.1
- [html5lib](https://pypi.org/project/html5lib) \>= 1.1
- [cryptography](https://pypi.org/project/cryptography) \>= 3.3.2
### Optional (if you want to use `pandas_datareader`)

View File

@@ -1,5 +1,5 @@
{% set name = "yfinance" %}
{% set version = "0.2.0" %}
{% set version = "0.2.9" %}
package:
name: "{{ name|lower }}"
@@ -26,6 +26,8 @@ requirements:
- frozendict >=2.3.4
- beautifulsoup4 >=4.11.1
- html5lib >=1.1
# - pycryptodome >=3.6.6
- cryptography >=3.3.2
- pip
- python
@@ -40,6 +42,8 @@ requirements:
- frozendict >=2.3.4
- beautifulsoup4 >=4.11.1
- html5lib >=1.1
# - pycryptodome >=3.6.6
- cryptography >=3.3.2
- python
test:

View File

@@ -8,3 +8,4 @@ pytz>=2022.5
frozendict>=2.3.4
beautifulsoup4>=4.11.1
html5lib>=1.1
cryptography>=3.3.2

View File

@@ -62,7 +62,9 @@ setup(
install_requires=['pandas>=1.3.0', 'numpy>=1.16.5',
'requests>=2.26', 'multitasking>=0.0.7',
'lxml>=4.9.1', 'appdirs>=1.4.4', 'pytz>=2022.5',
'frozendict>=2.3.4',
'frozendict>=2.3.4',
# 'pycryptodome>=3.6.6',
'cryptography>=3.3.2',
'beautifulsoup4>=4.11.1', 'html5lib>=1.1'],
entry_points={
'console_scripts': [

View File

@@ -24,14 +24,12 @@ class TestPriceHistory(unittest.TestCase):
def test_daily_index(self):
tkrs = ["BHP.AX", "IMP.JO", "BP.L", "PNL.L", "INTC"]
intervals = ["1d", "1wk", "1mo"]
for tkr in tkrs:
dat = yf.Ticker(tkr, session=self.session)
ticker = yf.Ticker(tkr, session=self.session)
for interval in intervals:
df = dat.history(period="5y", interval=interval)
df = ticker.history(period="5y", interval=interval)
f = df.index.time == _dt.time(0)
self.assertTrue(f.all())
@@ -39,13 +37,14 @@ class TestPriceHistory(unittest.TestCase):
def test_duplicatingHourly(self):
tkrs = ["IMP.JO", "BHG.JO", "SSW.JO", "BP.L", "INTC"]
for tkr in tkrs:
dat = yf.Ticker(tkr, session=self.session)
tz = dat._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
ticker = yf.Ticker(tkr, session=self.session)
tz = ticker._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
dt_utc = _tz.timezone("UTC").localize(_dt.datetime.utcnow())
dt = dt_utc.astimezone(_tz.timezone(tz))
df = dat.history(start=dt.date() - _dt.timedelta(days=1), interval="1h")
start_d = dt.date() - _dt.timedelta(days=7)
df = ticker.history(start=start_d, interval="1h")
dt0 = df.index[-2]
dt1 = df.index[-1]
@@ -55,13 +54,12 @@ class TestPriceHistory(unittest.TestCase):
print("Ticker = ", tkr)
raise
def test_duplicatingDaily(self):
tkrs = ["IMP.JO", "BHG.JO", "SSW.JO", "BP.L", "INTC"]
test_run = False
for tkr in tkrs:
dat = yf.Ticker(tkr, session=self.session)
tz = dat._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
ticker = yf.Ticker(tkr, session=self.session)
tz = ticker._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
dt_utc = _tz.timezone("UTC").localize(_dt.datetime.utcnow())
dt = dt_utc.astimezone(_tz.timezone(tz))
@@ -69,7 +67,7 @@ class TestPriceHistory(unittest.TestCase):
continue
test_run = True
df = dat.history(start=dt.date() - _dt.timedelta(days=7), interval="1d")
df = ticker.history(start=dt.date() - _dt.timedelta(days=7), interval="1d")
dt0 = df.index[-2]
dt1 = df.index[-1]
@@ -86,15 +84,15 @@ class TestPriceHistory(unittest.TestCase):
tkrs = ['MSFT', 'IWO', 'VFINX', '^GSPC', 'BTC-USD']
test_run = False
for tkr in tkrs:
dat = yf.Ticker(tkr, session=self.session)
tz = dat._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
ticker = yf.Ticker(tkr, session=self.session)
tz = ticker._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
dt = _tz.timezone(tz).localize(_dt.datetime.now())
if dt.date().weekday() not in [1, 2, 3, 4]:
continue
test_run = True
df = dat.history(start=dt.date() - _dt.timedelta(days=7), interval="1wk")
df = ticker.history(start=dt.date() - _dt.timedelta(days=7), interval="1wk")
dt0 = df.index[-2]
dt1 = df.index[-1]
try:
@@ -110,22 +108,27 @@ class TestPriceHistory(unittest.TestCase):
def test_intraDayWithEvents(self):
# TASE dividend release pre-market, doesn't merge nicely with intra-day data so check still present
tkr = "ICL.TA"
# tkr = "ESLT.TA"
# tkr = "ONE.TA"
# tkr = "MGDL.TA"
start_d = _dt.date.today() - _dt.timedelta(days=60)
end_d = None
df_daily = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=True)
df_daily_divs = df_daily["Dividends"][df_daily["Dividends"] != 0]
if df_daily_divs.shape[0] == 0:
self.skipTest("Skipping test_intraDayWithEvents() because 'ICL.TA' has no dividend in last 60 days")
tase_tkrs = ["ICL.TA", "ESLT.TA", "ONE.TA", "MGDL.TA"]
test_run = False
for tkr in tase_tkrs:
start_d = _dt.date.today() - _dt.timedelta(days=59)
end_d = None
df_daily = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=True)
df_daily_divs = df_daily["Dividends"][df_daily["Dividends"] != 0]
if df_daily_divs.shape[0] == 0:
# self.skipTest("Skipping test_intraDayWithEvents() because 'ICL.TA' has no dividend in last 60 days")
continue
last_div_date = df_daily_divs.index[-1]
start_d = last_div_date.date()
end_d = last_div_date.date() + _dt.timedelta(days=1)
df = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="15m", actions=True)
self.assertTrue((df["Dividends"] != 0.0).any())
last_div_date = df_daily_divs.index[-1]
start_d = last_div_date.date()
end_d = last_div_date.date() + _dt.timedelta(days=1)
df = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="15m", actions=True)
self.assertTrue((df["Dividends"] != 0.0).any())
test_run = True
break
if not test_run:
self.skipTest("Skipping test_intraDayWithEvents() because no tickers had a dividend in last 60 days")
def test_dailyWithEvents(self):
# Reproduce issue #521
@@ -230,7 +233,6 @@ class TestPriceHistory(unittest.TestCase):
def test_tz_dst_ambiguous(self):
# Reproduce issue #1100
try:
yf.Ticker("ESLT.TA", session=self.session).history(start="2002-10-06", end="2002-10-09", interval="1d")
except _tz.exceptions.AmbiguousTimeError:
@@ -245,36 +247,178 @@ class TestPriceHistory(unittest.TestCase):
# The correction is successful if no days are weekend, and weekly data begins Monday
tkr = "AGRO3.SA"
dat = yf.Ticker(tkr, session=self.session)
ticker = yf.Ticker(tkr, session=self.session)
start = "2021-01-11"
end = "2022-11-05"
interval = "1d"
df = dat.history(start=start, end=end, interval=interval)
df = ticker.history(start=start, end=end, interval=interval)
self.assertTrue(((df.index.weekday >= 0) & (df.index.weekday <= 4)).all())
interval = "1wk"
df = dat.history(start=start, end=end, interval=interval)
df = ticker.history(start=start, end=end, interval=interval)
try:
self.assertTrue((df.index.weekday == 0).all())
except:
print("Weekly data not aligned to Monday")
raise
def test_prune_post_intraday_us(self):
# Half-day before USA Thanksgiving. Yahoo normally
# returns an interval starting when regular trading closes,
# even if prepost=False.
# Setup
tkr = "AMZN"
interval = "1h"
interval_td = _dt.timedelta(hours=1)
time_open = _dt.time(9, 30)
time_close = _dt.time(16)
special_day = _dt.date(2022, 11, 25)
time_early_close = _dt.time(13)
dat = yf.Ticker(tkr, session=self.session)
# Run
start_d = special_day - _dt.timedelta(days=7)
end_d = special_day + _dt.timedelta(days=7)
df = dat.history(start=start_d, end=end_d, interval=interval, prepost=False, keepna=True)
tg_last_dt = df.loc[str(special_day)].index[-1]
self.assertTrue(tg_last_dt.time() < time_early_close)
# Test no other afternoons (or mornings) were pruned
start_d = _dt.date(special_day.year, 1, 1)
end_d = _dt.date(special_day.year+1, 1, 1)
df = dat.history(start=start_d, end=end_d, interval="1h", prepost=False, keepna=True)
last_dts = _pd.Series(df.index).groupby(df.index.date).last()
f_early_close = (last_dts+interval_td).dt.time < time_close
early_close_dates = last_dts.index[f_early_close].values
self.assertEqual(len(early_close_dates), 1)
self.assertEqual(early_close_dates[0], special_day)
first_dts = _pd.Series(df.index).groupby(df.index.date).first()
f_late_open = first_dts.dt.time > time_open
late_open_dates = first_dts.index[f_late_open]
self.assertEqual(len(late_open_dates), 0)
def test_prune_post_intraday_omx(self):
# Half-day before Sweden Christmas. Yahoo normally
# returns an interval starting when regular trading closes,
# even if prepost=False.
# If prepost=False, test that yfinance is removing prepost intervals.
# Setup
tkr = "AEC.ST"
interval = "1h"
interval_td = _dt.timedelta(hours=1)
time_open = _dt.time(9)
time_close = _dt.time(17,30)
special_day = _dt.date(2022, 12, 23)
time_early_close = _dt.time(13, 2)
dat = yf.Ticker(tkr, session=self.session)
# Half trading day Jan 5, Apr 14, May 25, Jun 23, Nov 4, Dec 23, Dec 30
half_days = [_dt.date(special_day.year, x[0], x[1]) for x in [(1,5), (4,14), (5,25), (6,23), (11,4), (12,23), (12,30)]]
# Yahoo has incorrectly classified afternoon of 2022-04-13 as post-market.
# Nothing yfinance can do because Yahoo doesn't return data with prepost=False.
# But need to handle in this test.
expected_incorrect_half_days = [_dt.date(2022,4,13)]
half_days = sorted(half_days+expected_incorrect_half_days)
# Run
start_d = special_day - _dt.timedelta(days=7)
end_d = special_day + _dt.timedelta(days=7)
df = dat.history(start=start_d, end=end_d, interval=interval, prepost=False, keepna=True)
tg_last_dt = df.loc[str(special_day)].index[-1]
self.assertTrue(tg_last_dt.time() < time_early_close)
# Test no other afternoons (or mornings) were pruned
start_d = _dt.date(special_day.year, 1, 1)
end_d = _dt.date(special_day.year+1, 1, 1)
df = dat.history(start=start_d, end=end_d, interval="1h", prepost=False, keepna=True)
last_dts = _pd.Series(df.index).groupby(df.index.date).last()
f_early_close = (last_dts+interval_td).dt.time < time_close
early_close_dates = last_dts.index[f_early_close].values
unexpected_early_close_dates = [d for d in early_close_dates if not d in half_days]
self.assertEqual(len(unexpected_early_close_dates), 0)
self.assertEqual(len(early_close_dates), len(half_days))
self.assertTrue(_np.equal(early_close_dates, half_days).all())
first_dts = _pd.Series(df.index).groupby(df.index.date).first()
f_late_open = first_dts.dt.time > time_open
late_open_dates = first_dts.index[f_late_open]
self.assertEqual(len(late_open_dates), 0)
def test_prune_post_intraday_asx(self):
# Setup
tkr = "BHP.AX"
interval = "1h"
interval_td = _dt.timedelta(hours=1)
time_open = _dt.time(10)
time_close = _dt.time(16,12)
# No early closes in 2022
dat = yf.Ticker(tkr, session=self.session)
# Test no afternoons (or mornings) were pruned
start_d = _dt.date(2022, 1, 1)
end_d = _dt.date(2022+1, 1, 1)
df = dat.history(start=start_d, end=end_d, interval="1h", prepost=False, keepna=True)
last_dts = _pd.Series(df.index).groupby(df.index.date).last()
f_early_close = (last_dts+interval_td).dt.time < time_close
early_close_dates = last_dts.index[f_early_close].values
self.assertEqual(len(early_close_dates), 0)
first_dts = _pd.Series(df.index).groupby(df.index.date).first()
f_late_open = first_dts.dt.time > time_open
late_open_dates = first_dts.index[f_late_open]
self.assertEqual(len(late_open_dates), 0)
def test_weekly_2rows_fix(self):
tkr = "AMZN"
start = _dt.date.today() - _dt.timedelta(days=14)
start -= _dt.timedelta(days=start.weekday())
dat = yf.Ticker(tkr)
df = dat.history(start=start, interval="1wk")
ticker = yf.Ticker(tkr)
df = ticker.history(start=start, interval="1wk")
self.assertTrue((df.index.weekday == 0).all())
class TestPriceRepair(unittest.TestCase):
session = None
@classmethod
def setUpClass(cls):
cls.session = requests_cache.CachedSession(backend='memory')
@classmethod
def tearDownClass(cls):
if cls.session is not None:
cls.session.close()
def test_reconstruct_2m(self):
# 2m repair requires 1m data.
# Yahoo restricts 1m fetches to 7 days max within last 30 days.
# Need to test that '_reconstruct_intervals_batch()' can handle this.
tkrs = ["BHP.AX", "IMP.JO", "BP.L", "PNL.L", "INTC"]
dt_now = _pd.Timestamp.utcnow()
td_7d = _dt.timedelta(days=7)
td_60d = _dt.timedelta(days=60)
# Round time for 'requests_cache' reuse
dt_now = dt_now.ceil("1h")
for tkr in tkrs:
dat = yf.Ticker(tkr, session=self.session)
end_dt = dt_now
start_dt = end_dt - td_60d
df = dat.history(start=start_dt, end=end_dt, interval="2m", repair=True)
def test_repair_100x_weekly(self):
# Setup:
tkr = "PNL.L"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.info["exchangeTimezoneName"]
tz_exchange = ticker.fast_info["timezone"]
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
df = _pd.DataFrame(data={"Open": [470.5, 473.5, 474.5, 470],
@@ -283,22 +427,22 @@ class TestPriceHistory(unittest.TestCase):
"Close": [475, 473.5, 472, 473.5],
"Adj Close": [475, 473.5, 472, 473.5],
"Volume": [2295613, 2245604, 3000287, 2635611]},
index=_pd.to_datetime([_dt.date(2022, 10, 23),
_dt.date(2022, 10, 16),
_dt.date(2022, 10, 9),
_dt.date(2022, 10, 2)]))
index=_pd.to_datetime([_dt.date(2022, 10, 24),
_dt.date(2022, 10, 17),
_dt.date(2022, 10, 10),
_dt.date(2022, 10, 3)]))
df = df.sort_index()
df.index.name = "Date"
df_bad = df.copy()
df_bad.loc["2022-10-23", "Close"] *= 100
df_bad.loc["2022-10-16", "Low"] *= 100
df_bad.loc["2022-10-2", "Open"] *= 100
df_bad.loc["2022-10-24", "Close"] *= 100
df_bad.loc["2022-10-17", "Low"] *= 100
df_bad.loc["2022-10-03", "Open"] *= 100
df.index = df.index.tz_localize(tz_exchange)
df_bad.index = df_bad.index.tz_localize(tz_exchange)
# Run test
df_repaired = dat._fix_unit_mixups(df_bad, "1wk", tz_exchange)
df_repaired = ticker._fix_unit_mixups(df_bad, "1wk", tz_exchange, prepost=False)
# First test - no errors left
for c in data_cols:
@@ -325,8 +469,9 @@ class TestPriceHistory(unittest.TestCase):
# PNL.L has a stock-split in 2022. Sometimes requesting data before 2022 is not split-adjusted.
tkr = "PNL.L"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.info["exchangeTimezoneName"]
ticker = yf.Ticker(tkr, session=self.session)
tz_exchange = ticker.fast_info["timezone"]
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
df = _pd.DataFrame(data={"Open": [400, 398, 392.5, 417],
@@ -353,7 +498,7 @@ class TestPriceHistory(unittest.TestCase):
df.index = df.index.tz_localize(tz_exchange)
df_bad.index = df_bad.index.tz_localize(tz_exchange)
df_repaired = dat._fix_unit_mixups(df_bad, "1wk", tz_exchange)
df_repaired = ticker._fix_unit_mixups(df_bad, "1wk", tz_exchange, prepost=False)
# First test - no errors left
for c in data_cols:
@@ -380,8 +525,8 @@ class TestPriceHistory(unittest.TestCase):
def test_repair_100x_daily(self):
tkr = "PNL.L"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.info["exchangeTimezoneName"]
ticker = yf.Ticker(tkr, session=self.session)
tz_exchange = ticker.fast_info["timezone"]
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
df = _pd.DataFrame(data={"Open": [478, 476, 476, 472],
@@ -403,7 +548,7 @@ class TestPriceHistory(unittest.TestCase):
df.index = df.index.tz_localize(tz_exchange)
df_bad.index = df_bad.index.tz_localize(tz_exchange)
df_repaired = dat._fix_unit_mixups(df_bad, "1d", tz_exchange)
df_repaired = ticker._fix_unit_mixups(df_bad, "1d", tz_exchange, prepost=False)
# First test - no errors left
for c in data_cols:
@@ -422,8 +567,9 @@ class TestPriceHistory(unittest.TestCase):
def test_repair_zeroes_daily(self):
tkr = "BBIL.L"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.info["exchangeTimezoneName"]
ticker = yf.Ticker(tkr, session=self.session)
tz_exchange = ticker.fast_info["timezone"]
df_bad = _pd.DataFrame(data={"Open": [0, 102.04, 102.04],
"High": [0, 102.1, 102.11],
@@ -438,7 +584,7 @@ class TestPriceHistory(unittest.TestCase):
df_bad.index.name = "Date"
df_bad.index = df_bad.index.tz_localize(tz_exchange)
repaired_df = dat._fix_zeroes(df_bad, "1d", tz_exchange)
repaired_df = ticker._fix_zeroes(df_bad, "1d", tz_exchange, prepost=False)
correct_df = df_bad.copy()
correct_df.loc["2022-11-01", "Open"] = 102.080002
@@ -449,41 +595,32 @@ class TestPriceHistory(unittest.TestCase):
def test_repair_zeroes_hourly(self):
tkr = "INTC"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.info["exchangeTimezoneName"]
ticker = yf.Ticker(tkr, session=self.session)
tz_exchange = ticker.fast_info["timezone"]
df_bad = _pd.DataFrame(data={"Open": [29.68, 29.49, 29.545, _np.nan, 29.485],
"High": [29.68, 29.625, 29.58, _np.nan, 29.49],
"Low": [29.46, 29.4, 29.45, _np.nan, 29.31],
"Close": [29.485, 29.545, 29.485, _np.nan, 29.325],
"Adj Close": [29.485, 29.545, 29.485, _np.nan, 29.325],
"Volume": [3258528, 2140195, 1621010, 0, 0]},
index=_pd.to_datetime([_dt.datetime(2022,11,25, 9,30),
_dt.datetime(2022,11,25, 10,30),
_dt.datetime(2022,11,25, 11,30),
_dt.datetime(2022,11,25, 12,30),
_dt.datetime(2022,11,25, 13,00)]))
df_bad = df_bad.sort_index()
df_bad.index.name = "Date"
df_bad.index = df_bad.index.tz_localize(tz_exchange)
correct_df = ticker.history(period="1wk", interval="1h", auto_adjust=False, repair=True)
repaired_df = dat._fix_zeroes(df_bad, "1h", tz_exchange)
df_bad = correct_df.copy()
bad_idx = correct_df.index[10]
df_bad.loc[bad_idx, "Open"] = _np.nan
df_bad.loc[bad_idx, "High"] = _np.nan
df_bad.loc[bad_idx, "Low"] = _np.nan
df_bad.loc[bad_idx, "Close"] = _np.nan
df_bad.loc[bad_idx, "Adj Close"] = _np.nan
df_bad.loc[bad_idx, "Volume"] = 0
repaired_df = ticker._fix_zeroes(df_bad, "1h", tz_exchange, prepost=False)
correct_df = df_bad.copy()
idx = _pd.Timestamp(2022,11,25, 12,30).tz_localize(tz_exchange)
correct_df.loc[idx, "Open"] = 29.485001
correct_df.loc[idx, "High"] = 29.49
correct_df.loc[idx, "Low"] = 29.43
correct_df.loc[idx, "Close"] = 29.455
correct_df.loc[idx, "Adj Close"] = 29.455
correct_df.loc[idx, "Volume"] = 609164
for c in ["Open", "Low", "High", "Close"]:
try:
self.assertTrue(_np.isclose(repaired_df[c], correct_df[c], rtol=1e-7).all())
except:
print("COLUMN", c)
print("- repaired_df")
print(repaired_df)
print("- correct_df[c]:")
print(correct_df[c])
print("- diff:")
print(repaired_df[c] - correct_df[c])
raise

View File

@@ -9,6 +9,7 @@ Specific test class:
"""
import pandas as pd
import numpy as np
from .context import yfinance as yf
@@ -43,8 +44,8 @@ class TestTicker(unittest.TestCase):
yf.utils.get_tz_cache().store(tkr, None)
# Test:
dat = yf.Ticker(tkr, session=self.session)
tz = dat._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
ticker = yf.Ticker(tkr, session=self.session)
tz = ticker._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
self.assertIsNotNone(tz)
@@ -52,85 +53,261 @@ class TestTicker(unittest.TestCase):
# Check yfinance doesn't die when ticker delisted
tkr = "AM2Z.TA"
dat = yf.Ticker(tkr, session=self.session)
dat.history(period="1wk")
dat.history(start="2022-01-01")
dat.history(start="2022-01-01", end="2022-03-01")
ticker = yf.Ticker(tkr, session=self.session)
ticker.history(period="1wk")
ticker.history(start="2022-01-01")
ticker.history(start="2022-01-01", end="2022-03-01")
yf.download([tkr], period="1wk")
dat.isin
dat.major_holders
dat.institutional_holders
dat.mutualfund_holders
dat.dividends
dat.splits
dat.actions
dat.shares
dat.info
dat.calendar
dat.recommendations
dat.earnings
dat.quarterly_earnings
dat.income_stmt
dat.quarterly_income_stmt
dat.balance_sheet
dat.quarterly_balance_sheet
dat.cashflow
dat.quarterly_cashflow
dat.recommendations_summary
dat.analyst_price_target
dat.revenue_forecasts
dat.sustainability
dat.options
dat.news
dat.earnings_trend
dat.earnings_dates
dat.earnings_forecasts
ticker.isin
ticker.major_holders
ticker.institutional_holders
ticker.mutualfund_holders
ticker.dividends
ticker.splits
ticker.actions
ticker.shares
ticker.get_shares_full()
ticker.info
ticker.calendar
ticker.recommendations
ticker.earnings
ticker.quarterly_earnings
ticker.income_stmt
ticker.quarterly_income_stmt
ticker.balance_sheet
ticker.quarterly_balance_sheet
ticker.cashflow
ticker.quarterly_cashflow
ticker.recommendations_summary
ticker.analyst_price_target
ticker.revenue_forecasts
ticker.sustainability
ticker.options
ticker.news
ticker.earnings_trend
ticker.earnings_dates
ticker.earnings_forecasts
def test_goodTicker(self):
# that yfinance works when full api is called on same instance of ticker
tkr = "IBM"
dat = yf.Ticker(tkr, session=self.session)
ticker = yf.Ticker(tkr, session=self.session)
dat.isin
dat.major_holders
dat.institutional_holders
dat.mutualfund_holders
dat.dividends
dat.splits
dat.actions
dat.shares
dat.info
dat.calendar
dat.recommendations
dat.earnings
dat.quarterly_earnings
dat.income_stmt
dat.quarterly_income_stmt
dat.balance_sheet
dat.quarterly_balance_sheet
dat.cashflow
dat.quarterly_cashflow
dat.recommendations_summary
dat.analyst_price_target
dat.revenue_forecasts
dat.sustainability
dat.options
dat.news
dat.earnings_trend
dat.earnings_dates
dat.earnings_forecasts
ticker.isin
ticker.major_holders
ticker.institutional_holders
ticker.mutualfund_holders
ticker.dividends
ticker.splits
ticker.actions
ticker.shares
ticker.get_shares_full()
ticker.info
ticker.calendar
ticker.recommendations
ticker.earnings
ticker.quarterly_earnings
ticker.income_stmt
ticker.quarterly_income_stmt
ticker.balance_sheet
ticker.quarterly_balance_sheet
ticker.cashflow
ticker.quarterly_cashflow
ticker.recommendations_summary
ticker.analyst_price_target
ticker.revenue_forecasts
ticker.sustainability
ticker.options
ticker.news
ticker.earnings_trend
ticker.earnings_dates
ticker.earnings_forecasts
dat.history(period="1wk")
dat.history(start="2022-01-01")
dat.history(start="2022-01-01", end="2022-03-01")
ticker.history(period="1wk")
ticker.history(start="2022-01-01")
ticker.history(start="2022-01-01", end="2022-03-01")
yf.download([tkr], period="1wk")
def test_session_pruning_goodTkr(self):
tkr = "IBM"
url = "https://finance.yahoo.com/quote/"+tkr
ticker = yf.Ticker(tkr, session=self.session)
# All requests should succeed, so all urls should be in cache
yf.enable_prune_session_cache()
expected_urls = []
ticker.history(period="1wk")
ticker.dividends
ticker.splits
ticker.actions
expected_urls.append(f"https://query2.finance.yahoo.com/v8/finance/chart/{tkr}?range=1wk&interval=1d&includePrePost=False&events=div%2Csplits%2CcapitalGains")
ticker.info
ticker.isin
ticker.calendar
ticker.recommendations
ticker.recommendations_summary
ticker.sustainability
expected_urls.append(f"https://finance.yahoo.com/quote/{tkr}")
ticker.analyst_price_target
ticker.revenue_forecasts
ticker.earnings_trend
ticker.earnings_forecasts
expected_urls.append(f"https://finance.yahoo.com/quote/{tkr}/analysis")
ticker.major_holders
ticker.institutional_holders
ticker.mutualfund_holders
expected_urls.append(f"https://finance.yahoo.com/quote/{tkr}/holders")
ticker.shares
ticker.earnings
ticker.quarterly_earnings
expected_urls.append(f"https://finance.yahoo.com/quote/{tkr}/financials")
ticker.income_stmt
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=annualTotalRevenue...")
ticker.quarterly_income_stmt
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=quarterlyTotalRevenue...")
ticker.balance_sheet
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=annualTotalAssets...")
ticker.quarterly_balance_sheet
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=quarterlyTotalAssets...")
ticker.cashflow
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=annualCashFlowsfromusedinOperatingActivitiesDirect...")
ticker.quarterly_cashflow
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=quarterlyCashFlowsfromusedinOperatingActivitiesDirect...")
ticker.options
expected_urls.append(f"https://query2.finance.yahoo.com/v7/finance/options/{tkr}")
ticker.news
expected_urls.append(f"https://query2.finance.yahoo.com/v1/finance/search?q={tkr}")
ticker.earnings_dates
expected_urls.append(f"https://finance.yahoo.com/calendar/earnings?symbol={tkr}&offset=0&size=12")
for url in expected_urls:
if url.endswith("..."):
# This url ridiculously long so just search for a partial match
url2 = url.replace("...", "")
in_cache = False
# for surl in self.session.cache.urls:
for response in self.session.cache.filter():
surl = response.url
if surl.startswith(url2):
in_cache = True
break
self.assertTrue(in_cache, "This url missing from requests_cache: "+url)
else:
self.assertTrue(self.session.cache.contains(url=url), "This url missing from requests_cache: "+url)
def test_session_pruning_badTkr(self):
# Ideally would test a valid ticker after triggering Yahoo block, but
# that's not god for me. As a proxy, use invalid ticker
tkr = "XYZ-X"
url = "https://finance.yahoo.com/quote/"+tkr
ticker = yf.Ticker(tkr, session=self.session)
# All requests should fail, so none of these urls should be in cache
yf.enable_prune_session_cache()
expected_urls = []
ticker.history(period="1wk")
ticker.dividends
ticker.splits
ticker.actions
expected_urls.append(f"https://query2.finance.yahoo.com/v8/finance/chart/{tkr}?range=1wk&interval=1d&includePrePost=False&events=div%2Csplits%2CcapitalGains")
ticker.info
ticker.isin
ticker.calendar
ticker.recommendations
ticker.recommendations_summary
ticker.sustainability
expected_urls.append(f"https://finance.yahoo.com/quote/{tkr}")
ticker.analyst_price_target
ticker.revenue_forecasts
ticker.earnings_trend
ticker.earnings_forecasts
expected_urls.append(f"https://finance.yahoo.com/quote/{tkr}/analysis")
ticker.major_holders
ticker.institutional_holders
ticker.mutualfund_holders
expected_urls.append(f"https://finance.yahoo.com/quote/{tkr}/holders")
ticker.shares
ticker.earnings
ticker.quarterly_earnings
expected_urls.append(f"https://finance.yahoo.com/quote/{tkr}/financials")
ticker.income_stmt
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=annualTotalRevenue...")
ticker.quarterly_income_stmt
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=quarterlyTotalRevenue...")
ticker.balance_sheet
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=annualTotalAssets...")
ticker.quarterly_balance_sheet
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=quarterlyTotalAssets...")
ticker.cashflow
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=annualCashFlowsfromusedinOperatingActivitiesDirect...")
ticker.quarterly_cashflow
expected_urls.append(f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{tkr}?symbol={tkr}&type=quarterlyCashFlowsfromusedinOperatingActivitiesDirect...")
ticker.options
expected_urls.append(f"https://query2.finance.yahoo.com/v7/finance/options/{tkr}")
# Skip news, don't care if in cache
# ticker.news
# expected_urls.append(f"https://query2.finance.yahoo.com/v1/finance/search?q={tkr}")
df = ticker.earnings_dates
expected_urls.append(f"https://finance.yahoo.com/calendar/earnings?symbol={tkr}&offset=0&size=12")
for url in expected_urls:
if url.endswith("..."):
# This url ridiculously long so just search for a partial match
url2 = url.replace("...", "")
in_cache = False
# for surl in self.session.cache.urls:
for response in self.session.cache.filter():
surl = response.url
if surl.startswith(url2):
in_cache = True
break
self.assertFalse(in_cache, "This url wrongly in requests_cache: "+url)
else:
self.assertFalse(self.session.cache.contains(url=url), "This url wrongly in requests_cache: "+url)
class TestTickerHistory(unittest.TestCase):
session = None
@classmethod
def setUpClass(cls):
cls.session = requests_cache.CachedSession(backend='memory')
@classmethod
def tearDownClass(cls):
if cls.session is not None:
cls.session.close()
def setUp(self):
# use a ticker that has dividends
self.ticker = yf.Ticker("IBM")
self.ticker = yf.Ticker("IBM", session=self.session)
def tearDown(self):
self.ticker = None
@@ -176,9 +353,19 @@ class TestTickerHistory(unittest.TestCase):
class TestTickerEarnings(unittest.TestCase):
session = None
@classmethod
def setUpClass(cls):
cls.session = requests_cache.CachedSession(backend='memory')
@classmethod
def tearDownClass(cls):
if cls.session is not None:
cls.session.close()
def setUp(self):
self.ticker = yf.Ticker("GOOGL")
self.ticker = yf.Ticker("GOOGL", session=self.session)
def tearDown(self):
self.ticker = None
@@ -237,9 +424,19 @@ class TestTickerEarnings(unittest.TestCase):
class TestTickerHolders(unittest.TestCase):
session = None
@classmethod
def setUpClass(cls):
cls.session = requests_cache.CachedSession(backend='memory')
@classmethod
def tearDownClass(cls):
if cls.session is not None:
cls.session.close()
def setUp(self):
self.ticker = yf.Ticker("GOOGL")
self.ticker = yf.Ticker("GOOGL", session=self.session)
def tearDown(self):
self.ticker = None
@@ -283,7 +480,7 @@ class TestTickerMiscFinancials(unittest.TestCase):
def setUp(self):
self.ticker = yf.Ticker("GOOGL", session=self.session)
# For ticker 'BSE.AX' (and others), Yahoo not returning
# full quarterly financials (usually cash-flow) with all entries,
# instead returns a smaller version in different data store.
@@ -497,6 +694,65 @@ class TestTickerMiscFinancials(unittest.TestCase):
data_cached = self.ticker_old_fmt.get_cashflow(legacy=True, freq="quarterly")
self.assertIs(data, data_cached, "data not cached")
def test_income_alt_names(self):
i1 = self.ticker.income_stmt
i2 = self.ticker.incomestmt
self.assertTrue(i1.equals(i2))
i3 = self.ticker.financials
self.assertTrue(i1.equals(i3))
i1 = self.ticker.get_income_stmt()
i2 = self.ticker.get_incomestmt()
self.assertTrue(i1.equals(i2))
i3 = self.ticker.get_financials()
self.assertTrue(i1.equals(i3))
i1 = self.ticker.quarterly_income_stmt
i2 = self.ticker.quarterly_incomestmt
self.assertTrue(i1.equals(i2))
i3 = self.ticker.quarterly_financials
self.assertTrue(i1.equals(i3))
i1 = self.ticker.get_income_stmt(freq="quarterly")
i2 = self.ticker.get_incomestmt(freq="quarterly")
self.assertTrue(i1.equals(i2))
i3 = self.ticker.get_financials(freq="quarterly")
self.assertTrue(i1.equals(i3))
def test_balance_sheet_alt_names(self):
i1 = self.ticker.balance_sheet
i2 = self.ticker.balancesheet
self.assertTrue(i1.equals(i2))
i1 = self.ticker.get_balance_sheet()
i2 = self.ticker.get_balancesheet()
self.assertTrue(i1.equals(i2))
i1 = self.ticker.quarterly_balance_sheet
i2 = self.ticker.quarterly_balancesheet
self.assertTrue(i1.equals(i2))
i1 = self.ticker.get_balance_sheet(freq="quarterly")
i2 = self.ticker.get_balancesheet(freq="quarterly")
self.assertTrue(i1.equals(i2))
def test_cash_flow_alt_names(self):
i1 = self.ticker.cash_flow
i2 = self.ticker.cashflow
self.assertTrue(i1.equals(i2))
i1 = self.ticker.get_cash_flow()
i2 = self.ticker.get_cashflow()
self.assertTrue(i1.equals(i2))
i1 = self.ticker.quarterly_cash_flow
i2 = self.ticker.quarterly_cashflow
self.assertTrue(i1.equals(i2))
i1 = self.ticker.get_cash_flow(freq="quarterly")
i2 = self.ticker.get_cashflow(freq="quarterly")
self.assertTrue(i1.equals(i2))
def test_sustainability(self):
data = self.ticker.sustainability
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
@@ -563,16 +819,145 @@ class TestTickerMiscFinancials(unittest.TestCase):
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
def test_info(self):
data = self.ticker.info
self.assertIsInstance(data, dict, "data has wrong type")
self.assertIn("symbol", data.keys(), "Did not find expected key in info dict")
self.assertEqual("GOOGL", data["symbol"], "Wrong symbol value in info dict")
def test_shares_full(self):
data = self.ticker.get_shares_full()
self.assertIsInstance(data, pd.Series, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
def test_bad_freq_value_raises_exception(self):
self.assertRaises(ValueError, lambda: self.ticker.get_cashflow(freq="badarg"))
class TestTickerInfo(unittest.TestCase):
session = None
@classmethod
def setUpClass(cls):
cls.session = requests_cache.CachedSession(backend='memory')
@classmethod
def tearDownClass(cls):
if cls.session is not None:
cls.session.close()
def setUp(self):
self.symbols = []
self.symbols += ["ESLT.TA", "BP.L", "GOOGL"]
self.symbols.append("QCSTIX") # good for testing, doesn't trade
self.symbols += ["BTC-USD", "IWO", "VFINX", "^GSPC"]
self.symbols += ["SOKE.IS", "ADS.DE"] # detected bugs
self.tickers = [yf.Ticker(s, session=self.session) for s in self.symbols]
def tearDown(self):
self.ticker = None
def test_info(self):
data = self.tickers[0].info
self.assertIsInstance(data, dict, "data has wrong type")
self.assertIn("symbol", data.keys(), "Did not find expected key in info dict")
self.assertEqual(self.symbols[0], data["symbol"], "Wrong symbol value in info dict")
def test_fast_info(self):
yf.scrapers.quote.PRUNE_INFO = False
fast_info_keys = set()
for ticker in self.tickers:
fast_info_keys.update(set(ticker.fast_info.keys()))
fast_info_keys = sorted(list(fast_info_keys))
key_rename_map = {}
key_rename_map["currency"] = "currency"
key_rename_map["quote_type"] = "quoteType"
key_rename_map["timezone"] = "exchangeTimezoneName"
key_rename_map["last_price"] = ["currentPrice", "regularMarketPrice"]
key_rename_map["open"] = ["open", "regularMarketOpen"]
key_rename_map["day_high"] = ["dayHigh", "regularMarketDayHigh"]
key_rename_map["day_low"] = ["dayLow", "regularMarketDayLow"]
key_rename_map["previous_close"] = ["previousClose"]
key_rename_map["regular_market_previous_close"] = ["regularMarketPreviousClose"]
key_rename_map["fifty_day_average"] = "fiftyDayAverage"
key_rename_map["two_hundred_day_average"] = "twoHundredDayAverage"
key_rename_map["year_change"] = ["52WeekChange", "fiftyTwoWeekChange"]
key_rename_map["year_high"] = "fiftyTwoWeekHigh"
key_rename_map["year_low"] = "fiftyTwoWeekLow"
key_rename_map["last_volume"] = ["volume", "regularMarketVolume"]
key_rename_map["ten_day_average_volume"] = ["averageVolume10days", "averageDailyVolume10Day"]
key_rename_map["three_month_average_volume"] = "averageVolume"
key_rename_map["market_cap"] = "marketCap"
key_rename_map["shares"] = "sharesOutstanding"
for k in list(key_rename_map.keys()):
if '_' in k:
key_rename_map[yf.utils.snake_case_2_camelCase(k)] = key_rename_map[k]
# Note: share count items in info[] are bad. Sometimes the float > outstanding!
# So often fast_info["shares"] does not match.
# Why isn't fast_info["shares"] wrong? Because using it to calculate market cap always correct.
bad_keys = {"shares"}
# Loose tolerance for averages, no idea why don't match info[]. Is info wrong?
custom_tolerances = {}
custom_tolerances["year_change"] = 1.0
# custom_tolerances["ten_day_average_volume"] = 1e-3
custom_tolerances["ten_day_average_volume"] = 1e-1
# custom_tolerances["three_month_average_volume"] = 1e-2
custom_tolerances["three_month_average_volume"] = 5e-1
custom_tolerances["fifty_day_average"] = 1e-2
custom_tolerances["two_hundred_day_average"] = 1e-2
for k in list(custom_tolerances.keys()):
if '_' in k:
custom_tolerances[yf.utils.snake_case_2_camelCase(k)] = custom_tolerances[k]
for k in fast_info_keys:
if k in key_rename_map:
k2 = key_rename_map[k]
else:
k2 = k
if not isinstance(k2, list):
k2 = [k2]
for m in k2:
for ticker in self.tickers:
if not m in ticker.info:
# print(f"symbol={ticker.ticker}: fast_info key '{k}' mapped to info key '{m}' but not present in info")
continue
if k in bad_keys:
continue
if k in custom_tolerances:
rtol = custom_tolerances[k]
else:
rtol = 5e-3
# rtol = 1e-4
correct = ticker.info[m]
test = ticker.fast_info[k]
# print(f"Testing: symbol={ticker.ticker} m={m} k={k}: test={test} vs correct={correct}")
if k in ["market_cap","marketCap"] and ticker.fast_info["currency"] in ["GBp", "ILA"]:
# Adjust for currency to match Yahoo:
test *= 0.01
try:
if correct is None:
self.assertTrue(test is None or (not np.isnan(test)), f"{k}: {test} must be None or real value because correct={correct}")
elif isinstance(test, float) or isinstance(correct, int):
self.assertTrue(np.isclose(test, correct, rtol=rtol), f"{ticker.ticker} {k}: {test} != {correct}")
else:
self.assertEqual(test, correct, f"{k}: {test} != {correct}")
except:
if k in ["regularMarketPreviousClose"] and ticker.ticker in ["ADS.DE"]:
# Yahoo is wrong, is returning post-market close not regular
continue
else:
raise
def suite():
suite = unittest.TestSuite()
suite.addTest(TestTicker('Test ticker'))
@@ -580,6 +965,7 @@ def suite():
suite.addTest(TestTickerHolders('Test holders'))
suite.addTest(TestTickerHistory('Test Ticker history'))
suite.addTest(TestTickerMiscFinancials('Test misc financials'))
suite.addTest(TestTickerInfo('Test info & fast_info'))
return suite

View File

@@ -24,6 +24,7 @@ from .ticker import Ticker
from .tickers import Tickers
from .multi import download
from .utils import set_tz_cache_location
from .data import enable_prune_session_cache, disable_prune_session_cache
__version__ = version.version
__author__ = "Ran Aroussi"
@@ -44,3 +45,4 @@ def pdr_override():
__all__ = ['download', 'Ticker', 'Tickers', 'pdr_override', 'set_tz_cache_location']
__all__ += ['enable_prune_session_cache', 'disable_prune_session_cache']

File diff suppressed because it is too large Load Diff

View File

@@ -1,8 +1,21 @@
import functools
from functools import lru_cache
import hashlib
from base64 import b64decode
usePycryptodome = False # slightly faster
# usePycryptodome = True
if usePycryptodome:
from Crypto.Cipher import AES
from Crypto.Util.Padding import unpad
else:
from cryptography.hazmat.primitives import padding
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
import requests as requests
import re
import pandas as _pd
from bs4 import BeautifulSoup
from frozendict import frozendict
@@ -13,6 +26,8 @@ except ImportError:
cache_maxsize = 64
prune_session_cache = True
def lru_cache_freezeargs(func):
"""
@@ -35,9 +50,134 @@ def lru_cache_freezeargs(func):
return wrapped
def _extract_extra_keys_from_stores(data):
new_keys = [k for k in data.keys() if k not in ["context", "plugins"]]
new_keys_values = set([data[k] for k in new_keys])
# Maybe multiple keys have same value - keep one of each
new_keys_uniq = []
new_keys_uniq_values = set()
for k in new_keys:
v = data[k]
if not v in new_keys_uniq_values:
new_keys_uniq.append(k)
new_keys_uniq_values.add(v)
return [data[k] for k in new_keys_uniq]
def decrypt_cryptojs_aes_stores(data, keys=None):
encrypted_stores = data['context']['dispatcher']['stores']
password = None
if keys is not None:
if not isinstance(keys, list):
raise TypeError("'keys' must be list")
candidate_passwords = keys
else:
candidate_passwords = []
if "_cs" in data and "_cr" in data:
_cs = data["_cs"]
_cr = data["_cr"]
_cr = b"".join(int.to_bytes(i, length=4, byteorder="big", signed=True) for i in json.loads(_cr)["words"])
password = hashlib.pbkdf2_hmac("sha1", _cs.encode("utf8"), _cr, 1, dklen=32).hex()
encrypted_stores = b64decode(encrypted_stores)
assert encrypted_stores[0:8] == b"Salted__"
salt = encrypted_stores[8:16]
encrypted_stores = encrypted_stores[16:]
def _EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="md5") -> tuple:
"""OpenSSL EVP Key Derivation Function
Args:
password (Union[str, bytes, bytearray]): Password to generate key from.
salt (Union[bytes, bytearray]): Salt to use.
keySize (int, optional): Output key length in bytes. Defaults to 32.
ivSize (int, optional): Output Initialization Vector (IV) length in bytes. Defaults to 16.
iterations (int, optional): Number of iterations to perform. Defaults to 1.
hashAlgorithm (str, optional): Hash algorithm to use for the KDF. Defaults to 'md5'.
Returns:
key, iv: Derived key and Initialization Vector (IV) bytes.
Taken from: https://gist.github.com/rafiibrahim8/0cd0f8c46896cafef6486cb1a50a16d3
OpenSSL original code: https://github.com/openssl/openssl/blob/master/crypto/evp/evp_key.c#L78
"""
assert iterations > 0, "Iterations can not be less than 1."
if isinstance(password, str):
password = password.encode("utf-8")
final_length = keySize + ivSize
key_iv = b""
block = None
while len(key_iv) < final_length:
hasher = hashlib.new(hashAlgorithm)
if block:
hasher.update(block)
hasher.update(password)
hasher.update(salt)
block = hasher.digest()
for _ in range(1, iterations):
block = hashlib.new(hashAlgorithm, block).digest()
key_iv += block
key, iv = key_iv[:keySize], key_iv[keySize:final_length]
return key, iv
def _decrypt(encrypted_stores, password, key, iv):
if usePycryptodome:
cipher = AES.new(key, AES.MODE_CBC, iv=iv)
plaintext = cipher.decrypt(encrypted_stores)
plaintext = unpad(plaintext, 16, style="pkcs7")
else:
cipher = Cipher(algorithms.AES(key), modes.CBC(iv))
decryptor = cipher.decryptor()
plaintext = decryptor.update(encrypted_stores) + decryptor.finalize()
unpadder = padding.PKCS7(128).unpadder()
plaintext = unpadder.update(plaintext) + unpadder.finalize()
plaintext = plaintext.decode("utf-8")
return plaintext
if not password is None:
try:
key, iv = _EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="md5")
except:
raise Exception("yfinance failed to decrypt Yahoo data response")
plaintext = _decrypt(encrypted_stores, password, key, iv)
else:
success = False
for i in range(len(candidate_passwords)):
# print(f"Trying candiate pw {i+1}/{len(candidate_passwords)}")
password = candidate_passwords[i]
try:
key, iv = _EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="md5")
plaintext = _decrypt(encrypted_stores, password, key, iv)
success = True
break
except:
pass
if not success:
raise Exception("yfinance failed to decrypt Yahoo data response")
decoded_stores = json.loads(plaintext)
return decoded_stores
_SCRAPE_URL_ = 'https://finance.yahoo.com/quote'
def enable_prune_session_cache():
global prune_session_cache
prune_session_cache = True
def disable_prune_session_cache():
global prune_session_cache
prune_session_cache = False
class TickerData:
"""
Have one place to retrieve data from Yahoo API in order to ease caching and speed up operations
@@ -49,6 +189,18 @@ class TickerData:
self.ticker = ticker
self._session = session or requests
def _check_requests_cache_hook(self):
try:
c = self._session.cache
except AttributeError:
# Not a caching session
return
global prune_session_cache
if not prune_session_cache:
self._session.hooks["response"] = []
elif prune_session_cache and not self._check_Yahoo_response in self._session.hooks["response"]:
self._session.hooks["response"].append(self._check_Yahoo_response)
def get(self, url, user_agent_headers=None, params=None, proxy=None, timeout=30):
proxy = self._get_proxy(proxy)
response = self._session.get(
@@ -72,6 +224,198 @@ class TickerData:
proxy = {"https": proxy}
return proxy
def _get_decryption_keys_from_yahoo_js(self, soup):
result = None
key_count = 4
re_script = soup.find("script", string=re.compile("root.App.main")).text
re_data = json.loads(re.search("root.App.main\s+=\s+(\{.*\})", re_script).group(1))
re_data.pop("context", None)
key_list = list(re_data.keys())
if re_data.get("plugins"): # 1) attempt to get last 4 keys after plugins
ind = key_list.index("plugins")
if len(key_list) > ind+1:
sub_keys = key_list[ind+1:]
if len(sub_keys) == key_count:
re_obj = {}
missing_val = False
for k in sub_keys:
if not re_data.get(k):
missing_val = True
break
re_obj.update({k: re_data.get(k)})
if not missing_val:
result = re_obj
if not result is None:
return [''.join(result.values())]
re_keys = [] # 2) attempt scan main.js file approach to get keys
prefix = "https://s.yimg.com/uc/finance/dd-site/js/main."
tags = [tag['src'] for tag in soup.find_all('script') if prefix in tag.get('src', '')]
for t in tags:
response_js = self.cache_get(t)
#
if response_js.status_code != 200:
time.sleep(random.randrange(10, 20))
response_js.close()
else:
r_data = response_js.content.decode("utf8")
re_list = [
x.group() for x in re.finditer(r"context.dispatcher.stores=JSON.parse((?:.*?\r?\n?)*)toString", r_data)
]
for rl in re_list:
re_sublist = [x.group() for x in re.finditer(r"t\[\"((?:.*?\r?\n?)*)\"\]", rl)]
if len(re_sublist) == key_count:
re_keys = [sl.replace('t["', '').replace('"]', '') for sl in re_sublist]
break
response_js.close()
if len(re_keys) == key_count:
break
re_obj = {}
missing_val = False
for k in re_keys:
if not re_data.get(k):
missing_val = True
break
re_obj.update({k: re_data.get(k)})
if not missing_val:
return [''.join(re_obj.values())]
return []
def _gather_keys_from_response(self, response):
# Gather decryption keys:
soup = BeautifulSoup(response.content, "html.parser")
keys = self._get_decryption_keys_from_yahoo_js(soup)
if len(keys) == 0:
msg = "No decryption keys could be extracted from JS file."
if "requests_cache" in str(type(response)):
msg += " Try flushing your 'requests_cache', probably parsing old JS."
print("WARNING: " + msg + " Falling back to backup decrypt methods.")
if len(keys) == 0:
keys = []
try:
extra_keys = _extract_extra_keys_from_stores(data)
keys = [''.join(extra_keys[-4:])]
except:
pass
#
keys_url = "https://github.com/ranaroussi/yfinance/raw/main/yfinance/scrapers/yahoo-keys.txt"
response_gh = self.cache_get(keys_url)
keys += response_gh.text.splitlines()
return keys
def _check_Yahoo_response(self, r, *args, **kwargs):
# Parse the data returned by Yahoo to determine if corrupt/incomplete.
# If bad, set 'status_code' to 204 "No content" , that stops it
# entering a requests_cache.
# Because this involves parsing, the output is added to response object
# with prefix "yf_" and reused elsewhere.
if not "yahoo.com/" in r.url:
# Only check Yahoo responses
return
attrs = dir(r)
r_from_cache = "from_cache" in attrs and r.from_cache
if "yf_data" in attrs or "yf_json" in attrs or "yf_html_pd" in attrs:
# Have already parsed this response, successfully
return
if "Will be right back" in r.text:
# Simple check, no parsing needed
r.status_code = 204
return r
parse_failed = False
r_modified = False
if "/ws/fundamentals-timeseries" in r.url:
# Timeseries
try:
data = r.json()
r.yf_json = data
r_modified = True
data["timeseries"]["result"]
except:
parse_failed = True
elif "/finance/chart/" in r.url:
# Prices
try:
data = r.json()
r.yf_json = data
r_modified = True
if data["chart"]["error"] is not None:
parse_failed = True
except Exception:
parse_failed = True
elif "/finance/options/" in r.url:
# Options
if not "expirationDates" in r.text:
# Parse will fail
parse_failed = True
elif "/finance/search?" in r.url:
# News, can't be bothered to check
return
elif "/calendar/earnings?" in r.url:
try:
dfs = _pd.read_html(r.text)
except ValueError as e:
if "No tables found" in str(e):
# Maybe this ticker doesn't have any earnings dates
pass
else:
parse_failed = True
except Exception as e:
parse_failed = True
else:
r.yf_html_pd = dfs
r_modified = True
elif "root.App.main" in r.text:
# JSON data stores
try:
json_str = r.text.split('root.App.main =')[1].split(
'(this)')[0].split(';\n}')[0].strip()
except IndexError:
parse_failed = True
if not parse_failed:
data = json.loads(json_str)
keys = self._gather_keys_from_response(r)
# Decrypt!
stores = decrypt_cryptojs_aes_stores(data, keys)
if stores is None:
# Maybe Yahoo returned old format, not encrypted
if "context" in data and "dispatcher" in data["context"]:
stores = data['context']['dispatcher']['stores']
if stores is None:
# raise Exception(f"{self.ticker}: Failed to extract data stores from web request")
print(f"{self.ticker}: Failed to decrypt/extract data stores from web request")
parse_failed = True
if "yf_data" not in attrs:
# if not parse_failed and "yf_data" not in attrs:
r.yf_data = stores
r_modified = True
if stores is not None and "QuoteSummaryStore" not in stores:
parse_failed = True
else:
return
if parse_failed:
if not r_from_cache:
r.status_code = 204 # No content
r_modified = True
if r_modified:
return r
@lru_cache_freezeargs
@lru_cache(maxsize=cache_maxsize)
def get_json_data_stores(self, sub_page: str = None, proxy=None) -> dict:
@@ -83,16 +427,46 @@ class TickerData:
else:
ticker_url = "{}/{}".format(_SCRAPE_URL_, self.ticker)
html = self.get(url=ticker_url, proxy=proxy).text
# Ensure hook ready to intercept get responses
self._check_requests_cache_hook()
# The actual json-data for stores is in a javascript assignment in the webpage
json_str = html.split('root.App.main =')[1].split(
'(this)')[0].split(';\n}')[0].strip()
data = json.loads(json_str)['context']['dispatcher']['stores']
response = self.get(url=ticker_url, proxy=proxy)
if "yf_data" in dir(response):
# _check_requests_cache_hook() already successfully extracted & decrypted
stores = response.yf_data
else:
# Extract JSON and decrypt
html = response.text
# The actual json-data for stores is in a javascript assignment in the webpage
try:
json_str = html.split('root.App.main =')[1].split(
'(this)')[0].split(';\n}')[0].strip()
except IndexError:
# Problem with data so clear from session cache
# self.session_cache_prune_url(ticker_url)
# Then exit
return {}
data = json.loads(json_str)
keys = self._gather_keys_from_response(response)
# Decrypt!
stores = decrypt_cryptojs_aes_stores(data, keys)
if stores is None:
# Maybe Yahoo returned old format, not encrypted
if "context" in data and "dispatcher" in data["context"]:
stores = data['context']['dispatcher']['stores']
if stores is None:
raise Exception(f"{self.ticker}: Failed to extract data stores from web request")
# return data
new_data = json.dumps(data).replace('{}', 'null')
new_data = json.dumps(stores).replace('{}', 'null')
new_data = re.sub(
r'{[\'|\"]raw[\'|\"]:(.*?),(.*?)}', r'\1', new_data)
return json.loads(new_data)
json_data = json.loads(new_data)
return json_data

View File

@@ -1,6 +1,6 @@
class YFianceException(Exception):
class YFinanceException(Exception):
pass
class YFianceDataException(YFianceException):
class YFinanceDataException(YFinanceException):
pass

View File

@@ -29,7 +29,7 @@ from . import Ticker, utils
from . import shared
def download(tickers, start=None, end=None, actions=False, threads=True, ignore_tz=True,
def download(tickers, start=None, end=None, actions=False, threads=True, ignore_tz=None,
group_by='column', auto_adjust=False, back_adjust=False, repair=False, keepna=False,
progress=True, period="max", show_errors=True, interval="1d", prepost=False,
proxy=None, rounding=False, timeout=10):
@@ -68,7 +68,7 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
How many threads to use for mass downloading. Default is True
ignore_tz: bool
When combining from different timezones, ignore that part of datetime.
Default is True
Default depends on interval. Intraday = False. Day+ = True.
proxy: str
Optional. Proxy server URL scheme. Default is None
rounding: bool
@@ -80,6 +80,14 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
seconds. (Can also be a fraction of a second e.g. 0.01)
"""
if ignore_tz is None:
# Set default value depending on interval
if interval[1:] in ['m', 'h']:
# Intraday
ignore_tz = False
else:
ignore_tz = True
# create ticker list
tickers = tickers if isinstance(
tickers, (list, set, tuple)) else tickers.replace(',', ' ').split()

View File

@@ -6,7 +6,7 @@ import numpy as np
from yfinance import utils
from yfinance.data import TickerData
from yfinance.exceptions import YFianceDataException, YFianceException
from yfinance.exceptions import YFinanceDataException, YFinanceException
class Fundamentals:
@@ -22,10 +22,10 @@ class Fundamentals:
self._financials_data = None
self._fin_data_quote = None
self._basics_already_scraped = False
self._financials = Fiancials(data)
self._financials = Financials(data)
@property
def financials(self) -> "Fiancials":
def financials(self) -> "Financials":
return self._financials
@property
@@ -97,7 +97,7 @@ class Fundamentals:
pass
class Fiancials:
class Financials:
def __init__(self, data: TickerData):
self._data = data
self._income_time_series = {}
@@ -143,8 +143,8 @@ class Fiancials:
if statement is not None:
return statement
except YFianceException as e:
print("Failed to create financials table for {} reason: {}".format(name, repr(e)))
except YFinanceException as e:
print(f"- {self._data.ticker}: Failed to create {name} financials table for reason: {repr(e)}")
return pd.DataFrame()
def _create_financials_table(self, name, timescale, proxy):
@@ -153,14 +153,8 @@ class Fiancials:
name = "financials"
keys = self._get_datastore_keys(name, proxy)
try:
# Developers note: TTM and template stuff allows for reproducing the nested structure
# visible on Yahoo website. But more work needed to make it user-friendly! Ideally
# return a tree data structure instead of Pandas MultiIndex
# So until this is implemented, just return simple tables
return self.get_financials_time_series(timescale, keys, proxy)
except Exception as e:
pass
@@ -183,10 +177,10 @@ class Fiancials:
try:
keys = _finditem1("key", data_stores['FinancialTemplateStore'])
except KeyError as e:
raise YFianceDataException("Parsing FinancialTemplateStore failed, reason: {}".format(repr(e)))
raise YFinanceDataException("Parsing FinancialTemplateStore failed, reason: {}".format(repr(e)))
if not keys:
raise YFianceDataException("No keys in FinancialTemplateStore")
raise YFinanceDataException("No keys in FinancialTemplateStore")
return keys
def get_financials_time_series(self, timescale, keys: list, proxy=None) -> pd.DataFrame:
@@ -201,7 +195,7 @@ class Fiancials:
url = ts_url_base + "&type=" + ",".join([timescale + k for k in keys])
# Yahoo returns maximum 4 years or 5 quarters, regardless of start_dt:
start_dt = datetime.datetime(2016, 12, 31)
end = (datetime.datetime.now() + datetime.timedelta(days=366))
end = pd.Timestamp.utcnow().ceil("D")
url += "&period1={}&period2={}".format(int(start_dt.timestamp()), int(end.timestamp()))
# Step 3: fetch and reshape data
@@ -272,8 +266,8 @@ class Fiancials:
if statement is not None:
return statement
except YFianceException as e:
print("Failed to create financials table for {} reason: {}".format(name, repr(e)))
except YFinanceException as e:
print(f"- {self._data.ticker}: Failed to create financials table for {name} reason: {repr(e)}")
return pd.DataFrame()
def _create_financials_table_old(self, name, timescale, proxy):
@@ -281,7 +275,7 @@ class Fiancials:
# Fetch raw data
if not "QuoteSummaryStore" in data_stores:
return pd.DataFrame()
raise YFinanceDataException(f"Yahoo not returning legacy financials data")
data = data_stores["QuoteSummaryStore"]
if name == "cash-flow":
@@ -296,12 +290,14 @@ class Fiancials:
key1 += "History"
if timescale == "quarterly":
key1 += "Quarterly"
data = data.get(key1)[key2]
if key1 not in data or data[key1] is None or key2 not in data[key1]:
raise YFinanceDataException(f"Yahoo not returning legacy {name} financials data")
data = data[key1][key2]
# Tabulate
df = pd.DataFrame(data)
if len(df) == 0:
return pd.DataFrame()
raise YFinanceDataException(f"Yahoo not returning legacy {name} financials data")
df = df.drop(columns=['maxAge'])
for col in df.columns:
df[col] = df[col].replace('-', np.nan)

View File

@@ -7,6 +7,73 @@ from yfinance import utils
from yfinance.data import TickerData
info_retired_keys_price = {"currentPrice", "dayHigh", "dayLow", "open", "previousClose", "volume", "volume24Hr"}
info_retired_keys_price.update({"regularMarket"+s for s in ["DayHigh", "DayLow", "Open", "PreviousClose", "Price", "Volume"]})
info_retired_keys_price.update({"fiftyTwoWeekLow", "fiftyTwoWeekHigh", "fiftyTwoWeekChange", "52WeekChange", "fiftyDayAverage", "twoHundredDayAverage"})
info_retired_keys_price.update({"averageDailyVolume10Day", "averageVolume10days", "averageVolume"})
info_retired_keys_exchange = {"currency", "exchange", "exchangeTimezoneName", "exchangeTimezoneShortName", "quoteType"}
info_retired_keys_marketCap = {"marketCap"}
info_retired_keys_symbol = {"symbol"}
info_retired_keys = info_retired_keys_price | info_retired_keys_exchange | info_retired_keys_marketCap | info_retired_keys_symbol
PRUNE_INFO = True
# PRUNE_INFO = False
from collections.abc import MutableMapping
class InfoDictWrapper(MutableMapping):
""" Simple wrapper around info dict, intercepting 'gets' to
print how-to-migrate messages for specific keys. Requires
override dict API"""
def __init__(self, info):
self.info = info
def keys(self):
return self.info.keys()
def __str__(self):
return self.info.__str__()
def __repr__(self):
return self.info.__repr__()
def __contains__(self, k):
return k in self.info.keys()
def __getitem__(self, k):
if k in info_retired_keys_price:
print(f"Price data removed from info (key='{k}'). Use Ticker.fast_info or history() instead")
return None
elif k in info_retired_keys_exchange:
print(f"Exchange data removed from info (key='{k}'). Use Ticker.fast_info or Ticker.get_history_metadata() instead")
return None
elif k in info_retired_keys_marketCap:
print(f"Market cap removed from info (key='{k}'). Use Ticker.fast_info instead")
return None
elif k in info_retired_keys_symbol:
print(f"Symbol removed from info (key='{k}'). You know this already")
return None
return self.info[self._keytransform(k)]
def __setitem__(self, k, value):
self.info[self._keytransform(k)] = value
def __delitem__(self, k):
del self.info[self._keytransform(k)]
def __iter__(self):
return iter(self.info)
def __len__(self):
return len(self.info)
def _keytransform(self, k):
return k
class Quote:
def __init__(self, data: TickerData, proxy=None):
@@ -14,6 +81,7 @@ class Quote:
self.proxy = proxy
self._info = None
self._retired_info = None
self._sustainability = None
self._recommendations = None
self._calendar = None
@@ -130,6 +198,19 @@ class Quote:
except Exception:
pass
# Delete redundant info[] keys, because values can be accessed faster
# elsewhere - e.g. price keys. Hope is reduces Yahoo spam effect.
# But record the dropped keys, because in rare cases they are needed.
self._retired_info = {}
for k in info_retired_keys:
if k in self._info:
self._retired_info[k] = self._info[k]
if PRUNE_INFO:
del self._info[k]
if PRUNE_INFO:
# InfoDictWrapper will explain how to access above data elsewhere
self._info = InfoDictWrapper(self._info)
# events
try:
cal = pd.DataFrame(quote_summary_store['calendarEvents']['earnings'])
@@ -194,17 +275,22 @@ class Quote:
for k in keys:
url += "&type=" + k
# Request 6 months of data
url += "&period1={}".format(
int((datetime.datetime.now() - datetime.timedelta(days=365 // 2)).timestamp()))
url += "&period2={}".format(int((datetime.datetime.now() + datetime.timedelta(days=1)).timestamp()))
start = pd.Timestamp.utcnow().floor("D") - datetime.timedelta(days=365 // 2)
start = int(start.timestamp())
end = pd.Timestamp.utcnow().ceil("D")
end = int(end.timestamp())
url += f"&period1={start}&period2={end}"
json_str = self._data.cache_get(url=url, proxy=proxy).text
json_data = json.loads(json_str)
key_stats = json_data["timeseries"]["result"][0]
if k not in key_stats:
# Yahoo website prints N/A, indicates Yahoo lacks necessary data to calculate
try:
key_stats = json_data["timeseries"]["result"][0]
if k not in key_stats:
# Yahoo website prints N/A, indicates Yahoo lacks necessary data to calculate
v = None
else:
# Select most recent (last) raw value in list:
v = key_stats[k][-1]["reportedValue"]["raw"]
except Exception:
v = None
else:
# Select most recent (last) raw value in list:
v = key_stats[k][-1]["reportedValue"]["raw"]
self._info[k] = v

View File

@@ -0,0 +1,5 @@
daf93e37cbf219cd4c1f3f74ec4551265ec5565b99e8c9322dccd6872941cf13c818cbb88cba6f530e643b4e2329b17ec7161f4502ce6a02bb0dbbe5fc0d0474
ad4d90b3c9f2e1d156ef98eadfa0ff93e4042f6960e54aa2a13f06f528e6b50ba4265a26a1fd5b9cd3db0d268a9c34e1d080592424309429a58bce4adc893c87
e9a8ab8e5620b712ebc2fb4f33d5c8b9c80c0d07e8c371911c785cf674789f1747d76a909510158a7b7419e86857f2d7abbd777813ff64840e4cbc514d12bcae
6ae2523aeafa283dad746556540145bf603f44edbf37ad404d3766a8420bb5eb1d3738f52a227b88283cca9cae44060d5f0bba84b6a495082589f5fe7acbdc9e
3365117c2a368ffa5df7313a4a84988f73926a86358e8eea9497c5ff799ce27d104b68e5f2fbffa6f8f92c1fef41765a7066fa6bcf050810a9c4c7872fd3ebf0

View File

@@ -161,6 +161,22 @@ class Ticker(TickerBase):
def quarterly_income_stmt(self) -> _pd.DataFrame:
return self.get_income_stmt(pretty=True, freq='quarterly')
@property
def incomestmt(self) -> _pd.DataFrame:
return self.income_stmt
@property
def quarterly_incomestmt(self) -> _pd.DataFrame:
return self.quarterly_income_stmt
@property
def financials(self) -> _pd.DataFrame:
return self.income_stmt
@property
def quarterly_financials(self) -> _pd.DataFrame:
return self.quarterly_income_stmt
@property
def balance_sheet(self) -> _pd.DataFrame:
return self.get_balance_sheet(pretty=True)
@@ -177,13 +193,21 @@ class Ticker(TickerBase):
def quarterly_balancesheet(self) -> _pd.DataFrame:
return self.quarterly_balance_sheet
@property
def cash_flow(self) -> _pd.DataFrame:
return self.get_cash_flow(pretty=True, freq="yearly")
@property
def quarterly_cash_flow(self) -> _pd.DataFrame:
return self.get_cash_flow(pretty=True, freq='quarterly')
@property
def cashflow(self) -> _pd.DataFrame:
return self.get_cashflow(pretty=True, freq="yearly")
return self.cash_flow
@property
def quarterly_cashflow(self) -> _pd.DataFrame:
return self.get_cashflow(pretty=True, freq='quarterly')
return self.quarterly_cash_flow
@property
def recommendations_summary(self):

View File

@@ -34,12 +34,8 @@ class Tickers:
tickers = tickers if isinstance(
tickers, list) else tickers.replace(',', ' ').split()
self.symbols = [ticker.upper() for ticker in tickers]
ticker_objects = {}
self.tickers = {ticker:Ticker(ticker, session=session) for ticker in self.symbols}
for ticker in self.symbols:
ticker_objects[ticker] = Ticker(ticker, session=session)
self.tickers = ticker_objects
# self.tickers = _namedtuple(
# "Tickers", ticker_objects.keys(), rename=True
# )(*ticker_objects.values())

View File

@@ -49,6 +49,18 @@ user_agent_headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
# From https://stackoverflow.com/a/59128615
from types import FunctionType
from inspect import getmembers
def attributes(obj):
disallowed_names = {
name for name, value in getmembers(type(obj))
if isinstance(value, FunctionType)}
return {
name: getattr(obj, name) for name in dir(obj)
if name[0] != '_' and name not in disallowed_names and hasattr(obj, name)}
def is_isin(string):
return bool(_re.match("^([A-Z]{2})([A-Z0-9]{9})([0-9]{1})$", string))
@@ -288,6 +300,11 @@ def camel2title(strings: List[str], sep: str = ' ', acronyms: Optional[List[str]
return strings
def snake_case_2_camelCase(s):
sc = s.split('_')[0] + ''.join(x.title() for x in s.split('_')[1:])
return sc
def _parse_user_dt(dt, exchange_tz):
if isinstance(dt, int):
# Should already be epoch, test with conversion:
@@ -307,7 +324,11 @@ def _parse_user_dt(dt, exchange_tz):
def _interval_to_timedelta(interval):
if interval == "1mo":
return _dateutil.relativedelta(months=1)
return _dateutil.relativedelta.relativedelta(months=1)
elif interval == "3mo":
return _dateutil.relativedelta.relativedelta(months=3)
elif interval == "1y":
return _dateutil.relativedelta.relativedelta(years=1)
elif interval == "1wk":
return _pd.Timedelta(days=7, unit='d')
else:
@@ -427,6 +448,35 @@ def set_df_tz(df, interval, tz):
return df
def fix_Yahoo_returning_prepost_unrequested(quotes, interval, metadata):
# Sometimes Yahoo returns post-market data despite not requesting it.
# Normally happens on half-day early closes.
#
# And sometimes returns pre-market data despite not requesting it.
# E.g. some London tickers.
tps_df = metadata["tradingPeriods"]
tps_df["_date"] = tps_df.index.date
quotes["_date"] = quotes.index.date
idx = quotes.index.copy()
quotes = quotes.merge(tps_df, how="left", validate="many_to_one")
quotes.index = idx
# "end" = end of regular trading hours (including any auction)
f_drop = quotes.index >= quotes["end"]
f_drop = f_drop | (quotes.index < quotes["start"])
if f_drop.any():
# When printing report, ignore rows that were already NaNs:
f_na = quotes[["Open","Close"]].isna().all(axis=1)
n_nna = quotes.shape[0] - _np.sum(f_na)
n_drop_nna = _np.sum(f_drop & ~f_na)
quotes_dropped = quotes[f_drop]
# if debug and n_drop_nna > 0:
# print(f"Dropping {n_drop_nna}/{n_nna} intervals for falling outside regular trading hours")
quotes = quotes[~f_drop]
metadata["tradingPeriods"] = tps_df.drop(["_date"], axis=1)
quotes = quotes.drop(["_date", "start", "end"], axis=1)
return quotes
def fix_Yahoo_returning_live_separate(quotes, interval, tz_exchange):
# Yahoo bug fix. If market is open today then Yahoo normally returns
# todays data as a separate row from rest-of week/month interval in above row.
@@ -607,7 +657,7 @@ def safe_merge_dfs(df_main, df_sub, interval):
if interval.endswith('m') or interval.endswith('h') or interval == "1d":
# Update: is possible with daily data when dividend very recent
f_missing = ~df_sub.index.isin(df.index)
df_sub_missing = df_sub[f_missing]
df_sub_missing = df_sub[f_missing].copy()
keys = {"Adj Open", "Open", "Adj High", "High", "Adj Low", "Low", "Adj Close",
"Close"}.intersection(df.columns)
df_sub_missing[list(keys)] = _np.nan
@@ -640,6 +690,71 @@ def is_valid_timezone(tz: str) -> bool:
return True
def format_history_metadata(md):
if not isinstance(md, dict):
return md
if len(md) == 0:
return md
tz = md["exchangeTimezoneName"]
for k in ["firstTradeDate", "regularMarketTime"]:
if k in md:
md[k] = _pd.to_datetime(md[k], unit='s', utc=True).tz_convert(tz)
if "currentTradingPeriod" in md:
for m in ["regular", "pre", "post"]:
if m in md["currentTradingPeriod"]:
for t in ["start", "end"]:
md["currentTradingPeriod"][m][t] = \
_pd.to_datetime(md["currentTradingPeriod"][m][t], unit='s', utc=True).tz_convert(tz)
del md["currentTradingPeriod"][m]["gmtoffset"]
del md["currentTradingPeriod"][m]["timezone"]
if "tradingPeriods" in md:
if md["tradingPeriods"] == {"pre":[], "post":[]}:
del md["tradingPeriods"]
if "tradingPeriods" in md:
tps = md["tradingPeriods"]
if isinstance(tps, list):
# Only regular times
regs_dict = [tps[i][0] for i in range(len(tps))]
pres_dict = None
posts_dict = None
elif isinstance(tps, dict):
# Includes pre- and post-market
pres_dict = [tps["pre"][i][0] for i in range(len(tps["pre"]))]
posts_dict = [tps["post"][i][0] for i in range(len(tps["post"]))]
regs_dict = [tps["regular"][i][0] for i in range(len(tps["regular"]))]
else:
raise Exception()
def _dict_to_table(d):
df = _pd.DataFrame.from_dict(d).drop(["timezone", "gmtoffset"], axis=1)
df["end"] = _pd.to_datetime(df["end"], unit='s', utc=True).dt.tz_convert(tz)
df["start"] = _pd.to_datetime(df["start"], unit='s', utc=True).dt.tz_convert(tz)
df.index = _pd.to_datetime(df["start"].dt.date)
df.index = df.index.tz_localize(tz)
return df
df = _dict_to_table(regs_dict)
df_cols = ["start", "end"]
if pres_dict is not None:
pre_df = _dict_to_table(pres_dict)
df = df.merge(pre_df.rename(columns={"start":"pre_start", "end":"pre_end"}), left_index=True, right_index=True)
df_cols = ["pre_start", "pre_end"]+df_cols
if posts_dict is not None:
post_df = _dict_to_table(posts_dict)
df = df.merge(post_df.rename(columns={"start":"post_start", "end":"post_end"}), left_index=True, right_index=True)
df_cols = df_cols+["post_start", "post_end"]
df = df[df_cols]
df.index.name = "Date"
md["tradingPeriods"] = df
return md
class ProgressBar:
def __init__(self, iterations, text='completed'):
self.text = text
@@ -702,7 +817,14 @@ class _KVStore:
with self._cache_mutex:
self.conn = _sqlite3.connect(filename, timeout=10, check_same_thread=False)
self.conn.execute('pragma journal_mode=wal')
self.conn.execute('create table if not exists "kv" (key TEXT primary key, value TEXT) without rowid')
try:
self.conn.execute('create table if not exists "kv" (key TEXT primary key, value TEXT) without rowid')
except Exception as e:
if 'near "without": syntax error' in str(e):
# "without rowid" requires sqlite 3.8.2. Older versions will raise exception
self.conn.execute('create table if not exists "kv" (key TEXT primary key, value TEXT)')
else:
raise
self.conn.commit()
_atexit.register(self.close)
@@ -743,8 +865,10 @@ class _TzCache:
"""Simple sqlite file cache of ticker->timezone"""
def __init__(self):
self._tz_db = None
self._setup_cache_folder()
# Must init db here, where is thread-safe
self._tz_db = _KVStore(_os.path.join(self._db_dir, "tkr-tz.db"))
self._migrate_cache_tkr_tz()
def _setup_cache_folder(self):
if not _os.path.isdir(self._db_dir):
@@ -776,11 +900,6 @@ class _TzCache:
@property
def tz_db(self):
# lazy init
if self._tz_db is None:
self._tz_db = _KVStore(_os.path.join(self._db_dir, "tkr-tz.db"))
self._migrate_cache_tkr_tz()
return self._tz_db
def _migrate_cache_tkr_tz(self):

View File

@@ -1 +1 @@
version = "0.2.0rc4"
version = "0.2.10b2"