Compare commits

...

143 Commits

Author SHA1 Message Date
ValueRaider
144efd3b08 Dev version 0.2.19b2 2023-05-11 13:52:41 +01:00
ValueRaider
80fc91ffa9 Merge pull request #1523 from ranaroussi/fix/price-fixes
Price fixes
2023-05-11 13:51:03 +01:00
ValueRaider
9821197fd1 Merge pull request #1522 from ranaroussi/fix/logging-messages
Improve logging messages
2023-05-11 13:50:45 +01:00
ValueRaider
45b5cac33b Improve logging messages
Improve logging messages related to price data fetches:
- fix 'debug is deprecated' msg
- append user args to 'may be delisted' msg - interval & dates/period
- improve formatting of 'cannot reconstruct' msg
- hide errors in 'history()' while accessing 'fast_info[]'
2023-05-10 14:47:58 +01:00
ValueRaider
d755b8c7ff Fix 'history()' edge cases
Fix merging prices & events if prices empty.
If user requested price repair, ensure 'Repaired?' column always present.
2023-05-10 14:44:50 +01:00
ValueRaider
ab1042b4c9 Dev version 0.2.19b1 2023-05-04 22:14:34 +01:00
ValueRaider
8172fc02d2 Merge pull request #1514 from ranaroussi/feature/optimise-history
Optimise Ticker.history() - up to 2x faster
2023-05-04 22:08:40 +01:00
ValueRaider
836082280b Merge branch 'dev' into feature/optimise-history 2023-05-04 22:08:28 +01:00
ValueRaider
6a98c2eda6 Merge pull request #1493 from ranaroussi/feature/error-reporting
Deprecate 'debug' arg, improve 'logging' use
2023-05-04 22:06:54 +01:00
ValueRaider
46f55c8983 Add debug logging to 'history()' ; Improve logger fmt 2023-05-04 22:04:39 +01:00
ValueRaider
b025fef22c Optimise Ticker.history() - up to 2x faster
format_history_metadata() is expensive. Improvements:
- only perform full formatting if user requests metadata
- when pruning prepost data, only format 'tradingPeriods' entry of metadata

Other small optimisations to several internal prices processing methods.

Speedups:
dat.history(period='1wk', interval='1h', prepost=True)  # 2x
dat.history(period='1mo', interval='1h', prepost=True)  # 1.46x
dat.history(period='1wk', interval='1h')  # 1.15x
dat.history(period='1mo', interval='1h')  # 1.13x
dat.history(period='1y', interval='1d')  # 1.36x
dat.history(period='5y', interval='1d')  # 1.13x
2023-04-30 00:35:08 +01:00
ValueRaider
e3778465d8 Merge branch 'dev' into feature/error-reporting 2023-04-22 16:02:56 +01:00
ValueRaider
f82177ea2e Improve download() logging - group errors & tracebacks for cleaner STDOUT 2023-04-16 21:57:04 +01:00
ValueRaider
142b1f3eb4 Merge pull request #1499 from ranaroussi/main
sync main -> dev
2023-04-16 19:08:50 +01:00
ValueRaider
afad7fcf0b Bump version to 0.2.18 2023-04-16 19:03:08 +01:00
ValueRaider
0baedbe4f5 Merge pull request #1498 from ranaroussi/hotfix/tz-cache-migrate-error
Fix handling Pandas parsing error during TZ-csv-cache migrate
2023-04-16 19:00:50 +01:00
ValueRaider
2c3c3dc8a9 Merge pull request #1496 from ranaroussi/hotfix/fast-info-np-not-found
Fix '_np not found', tweak 'info[] fixed' message
2023-04-16 18:59:38 +01:00
ValueRaider
8585dda77a Fix handling Pandas parsing error during TZ-csv-cache migrate 2023-04-16 15:09:28 +01:00
ValueRaider
3eb60fbd4a Fix '_np not found', tweak 'info[] fixed' message 2023-04-16 10:37:25 +01:00
ValueRaider
d3e2e71a6e Improve logging behaviour, particulary download()
- Use same logger across all files
- download():
  - write tracebacks to DEBUG
  - deprecate 'show_errors' argument
2023-04-15 17:29:07 +01:00
ValueRaider
4937c933a2 Deprecate 'debug' arg, improve 'logging' use 2023-04-15 16:47:39 +01:00
ValueRaider
045cd45893 Bump version to 0.2.17 2023-04-10 21:55:21 +01:00
ValueRaider
6d52cb6e3a Merge pull request #1488 from steven9909/fix_localize
Fix tzinfo missing attribute
2023-04-10 21:51:54 +01:00
steven9909
a24c0e1391 fix tzinfo missing attribute
tzinfo does not have a localize attribute so it is replaced with timestamp in UTC
2023-04-10 16:04:58 -04:00
ValueRaider
1e941fc86a Merge branch 'main' into dev 2023-04-09 23:45:37 +01:00
ValueRaider
0b52e8f118 Bump version to 0.2.16 2023-04-09 23:42:50 +01:00
ValueRaider
d45bed3d53 Fix 'fast_info deprecated' msg appearing at Ticker() init 2023-04-09 23:41:44 +01:00
ValueRaider
4152f7c897 Bump version to 0.2.15 2023-04-09 21:07:16 +01:00
ValueRaider
e7a3848f69 Merge pull request #1477 from ranaroussi/feature/price-repair-tweaks
Price repair: add 'Repaired?' column, and a bugfix
2023-04-09 21:01:49 +01:00
ValueRaider
fc4350e463 Merge pull request #1480 from kennykos/get_full_info
'info' fetch now gets same data as scrape
2023-04-09 21:01:34 +01:00
ValueRaider
13556afd90 README.md: reorganise & link to 'How to contribute' 2023-04-07 12:21:01 +01:00
ValueRaider
3d29ced428 Merge pull request #1474 from garrettladley/leverage-dict-and-list-comps
Leverage dict & list comprehensions in yfinance/tickers.py
2023-04-06 13:26:08 +01:00
Value Raider
6a63ce9e15 Demote 'fast_info'
Demote 'fast_info':
- inform user can revert to 'info'
- remove from README
- relocate class from base.py -> quote.py
2023-04-06 12:21:57 +01:00
garrettladley
2fe5a0a361 leveraged dict & list comps in yfinance/tickers.py 2023-04-05 18:55:47 -04:00
kennykos
63699a6aad 'info' fetch now gets same data as scrape
* Changed base url to "https://query2.finance.yahoo.com/v10/finance/quoteSummary"
* instead of just getting the quote, we now get
	* ```
	   items = ['summaryProfile', 'financialData', 'quoteType',
                    'defaultKeyStatistics', 'assetProfile', 'summaryDetail']
          ```
	which is the same as in the scrape function
2023-04-05 18:23:36 +01:00
Value Raider
a649b40dc9 Price repair: add 'Repaired?' column, and a bugfix
Price repair changes:
- if user requests price repair, add 'Repaired?' bool column showing what rows were repaired.
- fix price repair requesting <1d data beyond Yahoo's limit.
- fix logger messages
2023-04-03 21:27:04 +01:00
ValueRaider
a01edee4fa Merge pull request #1476 from ranaroussi/main
main -> dev
2023-04-03 21:20:50 +01:00
Value Raider
5367f62bd7 Bump version to 0.2.14 2023-03-25 11:39:21 +00:00
ValueRaider
27cb90c596 Merge pull request #1461 from qianyun210603/main
Add failback for decryption error in info interface
2023-03-25 11:33:27 +00:00
BookSword
6c2682654a Fetch 'info' dict via API 2023-03-24 18:04:07 +00:00
Value Raider
e89e190d11 Merge branch 'main' into dev 2023-03-21 19:05:56 +00:00
ValueRaider
a236270389 Merge pull request #1457 from ranaroussi/fix/price-fixes-various
Various fixes to price data processing
2023-03-21 18:59:13 +00:00
Value Raider
ef1205388c Bump version to 0.2.13 2023-03-21 18:56:32 +00:00
Value Raider
bb477989d4 Fix price-events merge when occurred pre-market 2023-03-21 18:52:35 +00:00
ValueRaider
478dc0a350 Merge pull request #1452 from ranaroussi/hotfix/prices-merge-events
Fix filtering events older than prices for merging
2023-03-21 18:16:29 +00:00
Value Raider
b5dca4941a Order history_metadata['tradingPeriods'] DF sensibly 2023-03-20 21:18:53 +00:00
Value Raider
6b71ba977c Various fixes to price data processing
- move drop-duplicates to before repair
- fix 'format_history_metadata()' processing 'regular' column
- fix Pandas & Numpy warnings
2023-03-20 21:10:45 +00:00
ValueRaider
195a7aa304 Merge pull request #1455 from mppics/fix/aggregate_capital_gains
Adding fix and test for aggregating Capital Gains
2023-03-18 17:24:53 +00:00
Matt Piccoli
a58d7456fe Adding fix and test for aggregating Capital Gains 2023-03-18 12:57:26 -04:00
ValueRaider
1edeaf07dc Merge pull request #1448 from ivan23kor/feature/clarify-end-argument
Clarify that interval is [start; end) in docstrings
2023-03-09 22:04:58 +00:00
Ivan Korostelev
7f04a9dcb6 Clarify that interval is [start; end) in docstrings 2023-03-09 14:27:21 -07:00
ValueRaider
7b95f554bd README: fix rate-limiting example 2023-02-21 12:24:35 +00:00
ValueRaider
6c70b866c7 Merge pull request #1423 from flaviovs/no-print
No print
2023-02-20 20:07:23 +00:00
Value Raider
bd696fb4db Beta version 0.2.13b1 2023-02-17 17:04:39 +00:00
Value Raider
d13aafa633 Replace more prints with logging, mostly in 'price repair' 2023-02-17 12:01:11 +00:00
Flávio Veloso Soares
00823f6fa6 Remove redundant logging text 2023-02-16 16:53:33 -08:00
Flávio Veloso Soares
21fdba9021 Replace warnings print() with warnings.warn(...) calls 2023-02-16 16:53:33 -08:00
Flávio Veloso Soares
972547ca8c Replace prints with logging module 2023-02-16 16:53:33 -08:00
ValueRaider
23b400f0fb Merge pull request #1421 from ranaroussi/fix/missing-price-history-errors
Improve handling missing price history
2023-02-16 14:22:10 +00:00
Value Raider
ca8c1c8cb4 Bump version to 0.2.12 2023-02-16 12:01:25 +00:00
ValueRaider
6b8b0d5c86 Merge pull request #1422 from ranaroussi/hotfix/disable-decrypt-fail-msg
Disable annoying 'backup decrypt' msg
2023-02-16 12:00:16 +00:00
Value Raider
952a04338f Disable annoying 'backup decrypt' msg 2023-02-15 16:46:55 +00:00
Value Raider
a1a385196b Improve handling missing price history
Fix fast_info[] dying if metadata incomplete/missing ; Price repair fix when no fine data available ; Fix _fix_unit_mixups() report
2023-02-14 17:31:14 +00:00
ValueRaider
62a442bd15 Update yahoo-keys.txt 2023-02-14 00:06:06 +00:00
ValueRaider
a0046439d1 Merge pull request #1400 from ranaroussi/feature/improve-performance
Optimise recent new features in `history`
2023-02-12 14:58:36 +00:00
ValueRaider
63a8476575 Merge pull request #1417 from ranaroussi/main
main -> dev
2023-02-12 14:56:19 +00:00
ValueRaider
e96f4f3cc0 Update yahoo-keys.txt 2023-02-12 09:57:25 +00:00
ValueRaider
cd5d0dfc3b Bump version to 0.2.11 2023-02-10 16:59:20 +00:00
ValueRaider
ece41cdb06 Merge pull request #1411 from sdeibel/main
Fix format_history_metadata for some symbols
2023-02-10 16:30:03 +00:00
ValueRaider
c362d54b1a Fix other metadata accesses + tests 2023-02-09 19:41:50 +00:00
Stephan Deibel
543e4fe582 Fix format_history_metadata for some symbols
Fix format_history_metadata when firstTradeDate is None, as is the case for QCSTIX and probably others.
2023-02-09 13:46:52 -05:00
ValueRaider
53fca7016e Bump version to 0.2.10 2023-02-07 22:05:17 +00:00
ValueRaider
4b6529c3a5 Merge pull request #1406 from ranaroussi/dev
dev -> main
2023-02-07 22:03:20 +00:00
ValueRaider
8957147926 Merge branch 'main' into dev 2023-02-07 22:02:46 +00:00
ValueRaider
4c7392ed17 Merge pull request #1403 from ranaroussi/fix/decrypt-keys
Fix decrypt keys
2023-02-07 21:55:33 +00:00
ValueRaider
0efda4f5af Fix filtering events older than prices for merging 2023-02-07 21:45:35 +00:00
ValueRaider
508de4aefb Dev version 0.2.10b3 2023-02-07 14:09:08 +00:00
ValueRaider
3d39992280 Add resilience to price repair
When calibrating price repair, use weighted average to estimate stock split ratio, is more resilient
2023-02-07 14:07:08 +00:00
ValueRaider
b462836540 Merge pull request #1385 from ranaroussi/fix/download-tz-behaviour
Restore original download() timezone handling
2023-02-07 13:16:03 +00:00
ValueRaider
2795660c28 Add a 5th backup key 2023-02-07 13:10:03 +00:00
ValueRaider
3dc87753ea Fix _get_decryption_keys_from_yahoo_js() returning '' 2023-02-07 13:09:49 +00:00
ValueRaider
645cc19037 Merge pull request #1379 from ranaroussi/feature/improve-decrypt
Add another backup decrypt option
2023-02-06 22:24:22 +00:00
ValueRaider
86d6acccf7 Fix dumb bugs in price repair - 1 more 2023-02-05 18:17:47 +00:00
ValueRaider
0f5db35b6e Optimise Ticker._reconstruct_intervals_batch() (slightly) 2023-02-05 18:16:08 +00:00
ValueRaider
7c6742a60a Optimise Ticker._fix_unit_mixups() 2023-02-05 15:15:56 +00:00
ValueRaider
4fa32a98ed Merge pull request #1397 from Matt-Seath/dev
Catch TypeError Exception
2023-02-05 13:49:48 +00:00
ValueRaider
36ace8017d Optimise Ticker._fix_zeroes() 2023-02-05 13:46:57 +00:00
Matt Seath
35f4071c0b Catch TypeError Exception
Addresses recent issue where calling Ticker.info would occasionally result in a TypeError Exception at line 287.
2023-02-05 11:49:40 +10:00
ValueRaider
ead0bce96e Optimise format_history_metadata() 2023-02-04 22:56:49 +00:00
ValueRaider
86b00091a9 Fix dumb bugs in price repair 2023-02-02 21:57:55 +00:00
ValueRaider
2a2928b4a0 Fix 'tradingPeriods' parsing when empty - 0.2.10b2 2023-02-01 13:31:54 +00:00
ValueRaider
d47133e5bf Dev version 0.2.10b1 2023-01-31 22:12:11 +00:00
ValueRaider
8f0c58dafa Dev version 0.2.10b0 2023-01-31 22:02:41 +00:00
ValueRaider
27a721c7dd Merge pull request #1380 from ranaroussi/fix/old-sqlite-error
Allow using sqlite3 < 3.8.2
2023-01-31 19:52:22 +00:00
ValueRaider
3e964d5319 Merge pull request #1383 from ranaroussi/fix/fast-info-prepost
Fix fast_info["previousClose"]
2023-01-31 19:51:46 +00:00
ValueRaider
84a31ae0b4 Merge pull request #1311 from ranaroussi/feature/prices-metadata-prune-prepost
Drop intraday intervals if in post-market but prepost=False
2023-01-31 19:50:00 +00:00
ValueRaider
891b533ec2 Drop intraday intervals if in prepost but prepost=False 2023-01-31 19:48:47 +00:00
ValueRaider
b9fb3e4979 Restore original download() tz handling: day/week/etc = ignore 2023-01-31 00:00:45 +00:00
ValueRaider
09342982a4 Add 'quoteType'. Improve handling tickers without trading 2023-01-30 23:53:06 +00:00
ValueRaider
da8c49011e fast_info: Fix previousClose & yearChange 2023-01-30 16:06:55 +00:00
ValueRaider
b805f0a010 Add another backup decrypt option 2023-01-29 23:09:45 +00:00
ValueRaider
5b0feb3d20 Fix tests 2023-01-29 16:53:26 +00:00
ValueRaider
ecbfc2957d bug_report: tighten language (again) 2023-01-29 13:58:02 +00:00
ValueRaider
e96248dec7 README: fix narrative ordering 2023-01-29 13:52:13 +00:00
ValueRaider
7d0045f03c README: simplify API overview with link to Wiki 2023-01-29 13:49:01 +00:00
ValueRaider
c3d7449844 Merge pull request #1289 from ranaroussi/fix/price-repair
Fix & improve price repair
2023-01-29 13:02:48 +00:00
ValueRaider
a4f11b0243 Fix price repair tests, remove unrelated changes 2023-01-29 13:01:54 +00:00
ValueRaider
1702fd0797 bug_report: tighten language 2023-01-29 00:54:27 +00:00
ValueRaider
464b3333d7 Allow using sqlite3 < 3.8.2 2023-01-29 00:34:46 +00:00
ValueRaider
685f2ec351 Merge branch 'dev' into fix/price-repair 2023-01-28 23:26:56 +00:00
ValueRaider
aad46baf28 price repair: Fix 'min_dt', add 'silent' mode 2023-01-28 23:14:28 +00:00
ValueRaider
a97db0aac6 README: add how-to for requests rate-limiting 2023-01-28 23:10:38 +00:00
ValueRaider
af5f96f97e Merge pull request #1368 from ranaroussi/fix/fast-info-camel-case
`fast_info` usability improvements
2023-01-28 22:28:42 +00:00
ValueRaider
a4bdaea888 fast_info: add camelCase, items() & values() 2023-01-28 22:27:51 +00:00
ValueRaider
ac5a9d2793 Merge pull request #1367 from ranaroussi/main
main -> dev
2023-01-27 22:09:59 +00:00
ValueRaider
b17ad32a47 Merge pull request #1366 from ranaroussi/doc/readme-explain-instability
README: comment on instability, tidy Ticker 'Quick start'
2023-01-27 18:31:32 +00:00
ValueRaider
af39855e28 README: comment on instability, tidy Ticker 'Quick start' 2023-01-27 17:36:25 +00:00
ValueRaider
ac6e047f0d Bump version to 0.2.9 2023-01-26 22:21:46 +00:00
ValueRaider
1e24337f29 Bump version to 0.2.8 2023-01-26 22:20:11 +00:00
ValueRaider
2cc82ae12f Merge pull request #1362 from ranaroussi/hotfix/fast-info-bugs
Ticker.fast_info: fix teething bugs
2023-01-26 22:03:06 +00:00
ValueRaider
d11f385049 Make fast_info JSON-serializable via toJSON() 2023-01-26 21:45:53 +00:00
ValueRaider
7377611e1f Add 'get(key, default)' to fast_info 2023-01-26 21:23:31 +00:00
ValueRaider
f3b5fb85c9 Remove exception raise from 'get_shares_full()' 2023-01-26 21:14:48 +00:00
ValueRaider
a4faef83ac 'fast_info' fixes: unusual symbols ; improve migration message ; 'regular_market_previous_close' 2023-01-26 21:02:18 +00:00
ValueRaider
e1184f745b Update yahoo-keys.txt 2023-01-26 17:06:03 +00:00
ValueRaider
fe630008e9 Bump version to 0.2.7 2023-01-26 17:03:00 +00:00
ValueRaider
b43072cf0a Merge pull request #1354 from ranaroussi/hotfix/rename-basic-info
Rename 'basic_info' -> 'fast_info'
2023-01-26 17:00:54 +00:00
ValueRaider
ad3f4cabc9 Improve 'get_shares_full()' error handling 2023-01-26 16:58:26 +00:00
ValueRaider
f70567872c Merge pull request #1353 from ranaroussi/hotfix/smart-decryption
Add decrypt key extraction from JS + GitHub backup
2023-01-26 16:44:23 +00:00
ValueRaider
a8ade72113 Rename 'basic_info' -> 'fast_info' ; Fix info tests 2023-01-26 16:36:25 +00:00
ValueRaider
1dcc8c9c8b Remove dead debug code 2023-01-26 14:57:15 +00:00
ValueRaider
dd5462b307 Add decrypt key extraction from JS + GitHub backup 2023-01-26 14:52:18 +00:00
ValueRaider
e39c03e8e3 Hardcode decrypt keys in GitHub for fix w/o PIP
`yfinance` will query this file via web request as a last resort. Avoids having to release a new PIP version just for a key update.
2023-01-26 14:20:03 +00:00
ValueRaider
9297504b84 Merge pull request #1346 from ranaroussi/main
main -> dev sync
2023-01-25 22:16:22 +00:00
ValueRaider
39c1ecc7a2 Improve price repair - reduce spam, improve data reliability
Extend 'reconstruct groups' to reduce Yahoo spam ; Extend fetch range to avoid first/last day irregularities ; Improve handling of 'max fetch days' Yahoo limit
2023-01-25 14:37:43 +00:00
ValueRaider
eb6d830e2a Fix repair volume=0 ; Tidy code 2023-01-21 23:00:30 +00:00
ValueRaider
2b0ae5a6c1 Remove 'repair_intervals' 2023-01-21 16:58:45 +00:00
ValueRaider
1636839b67 Handle request to reconstruct 1m 2023-01-20 00:13:28 +00:00
ValueRaider
65b97d024b Improve reporting 2023-01-20 00:13:02 +00:00
ValueRaider
197d2968e3 Add 'repair_intervals', rename 'repair'->'repair_prices' 2023-01-19 22:19:16 +00:00
ValueRaider
7460dbea17 If reconstructing 1d interval with 1h, always request prepost 2023-01-19 22:18:46 +00:00
ValueRaider
b49fd797fc Fix & improve price repair
Fix repair calibration & volume=0 repair ; Extend repair to sub-hour ; Avoid attempting repair of mostly-NaN days
2023-01-19 22:18:46 +00:00
ValueRaider
0ba810fda5 Improve 'history_metadata' formatting 2023-01-16 18:30:28 +00:00
19 changed files with 1802 additions and 874 deletions

View File

@@ -7,7 +7,9 @@ assignees: ''
---
# READ BEFORE POSTING
# IMPORTANT
If you want help, you got to read this first, follow the instructions.
### Are you up-to-date?
@@ -23,20 +25,19 @@ and comparing against [PIP](https://pypi.org/project/yfinance/#history).
### Does Yahoo actually have the data?
Are spelling ticker *exactly* same as Yahoo?
Are you spelling ticker *exactly* same as Yahoo?
Visit `finance.yahoo.com` and confim they have your data. Maybe your ticker was delisted.
Then visit `finance.yahoo.com` and confirm they have the data you want. Maybe your ticker was delisted, or your expectations of `yfinance` are wrong.
### Are you spamming Yahoo?
Yahoo Finance free service has limit on query rate dependent on request - roughly 500/minute for prices, 10/minute for info. Them delaying or blocking your spam is not a bug.
Yahoo Finance free service has rate-limiting depending on request type - roughly 60/minute for prices, 10/minute for info. Once limit hit, Yahoo can delay, block, or return bad data. Not a `yfinance` bug.
### Still think it's a bug?
Delete this default message and submit your bug report here, providing the following as best you can:
Delete this default message (all of it) and submit your bug report here, providing the following as best you can:
- Simple code that reproduces your problem
- Error message, with traceback if shown
- Info about your system:
- yfinance version
- operating system
- Simple code that reproduces your problem, that we can copy-paste-run
- Exception message with full traceback, or proof `yfinance` returning bad data
- `yfinance` version and Python version
- Operating system type

View File

@@ -1,6 +1,75 @@
Change Log
===========
0.2.19b2 - beta
-------
Improve logging messages #1522
Price fixes #1523
0.2.19b1 - beta
-------
Optimise Ticker.history #1514
Logging module #1493
0.2.18
------
Fix 'fast_info' error '_np not found' #1496
Fix bug in timezone cache #1498
0.2.17
------
Fix prices error with Pandas 2.0 #1488
0.2.16
------
Fix 'fast_info deprecated' msg appearing at Ticker() init
0.2.15
------
Restore missing Ticker.info keys #1480
0.2.14
------
Fix Ticker.info dict by fetching from API #1461
0.2.13
------
Price bug fixes:
- fetch big-interval with Capital Gains #1455
- merging dividends & splits with prices #1452
0.2.12
------
Disable annoying 'backup decrypt' msg
0.2.11
------
Fix history_metadata accesses for unusual symbols #1411
0.2.10
------
General
- allow using sqlite3 < 3.8.2 #1380
- add another backup decrypt option #1379
Prices
- restore original download() timezone handling #1385
- fix & improve price repair #1289 2a2928b 86d6acc
- drop intraday intervals if in post-market but prepost=False #1311
Info
- fast_info improvements:
- add camelCase keys, add dict functions values() & items() #1368
- fix fast_info["previousClose"] #1383
- catch TypeError Exception #1397
0.2.9
-----
- Fix fast_info bugs #1362
0.2.7
-----
- Fix Yahoo decryption, smarter this time #1353
- Rename basic_info -> fast_info #1354
0.2.6
-----
- Fix Ticker.basic_info lazy-loading #1342

166
README.md
View File

@@ -42,12 +42,10 @@ Yahoo! finance API is intended for personal use only.**
---
## What's new in version 0.2
## News [2023-01-27]
Since December 2022 Yahoo has been encrypting the web data that `yfinance` scrapes for non-market data. Fortunately the decryption keys are available, although Yahoo moved/changed them several times hence `yfinance` breaking several times. `yfinance` is now better prepared for any future changes by Yahoo.
- Optimised web scraping
- All 3 financials tables now match website so expect keys to change. If you really want old tables, use [`Ticker.get_[income_stmt|balance_sheet|cashflow](legacy=True, ...)`](https://github.com/ranaroussi/yfinance/blob/85783da515761a145411d742c2a8a3c1517264b0/yfinance/base.py#L968)
- price data improvements: fix bug NaN rows with dividend; new repair feature for missing or 100x prices `download(repair=True)`; new attribute `Ticker.history_metadata`
[See release notes for full list of changes](https://github.com/ranaroussi/yfinance/releases/tag/0.2.1)
Why is Yahoo doing this? We don't know. Is it to stop scrapers? Maybe, so we've implemented changes to reduce load on Yahoo. In December we rolled out version 0.2 with optimised scraping. ~Then in 0.2.6 introduced `Ticker.fast_info`, providing much faster access to some `info` elements wherever possible e.g. price stats and forcing users to switch (sorry but we think necessary). `info` will continue to exist for as long as there are elements without a fast alternative.~ `info` now fixed and much faster than before.
## Quick Start
@@ -60,33 +58,26 @@ import yfinance as yf
msft = yf.Ticker("MSFT")
# fast access to subset of stock info
msft.basic_info
# slow access to all stock info
# get all stock info
msft.info
# get historical market data
hist = msft.history(period="max")
hist = msft.history(period="1mo")
# show meta information about the history (requires history() to be called first)
msft.history_metadata
# show actions (dividends, splits, capital gains)
msft.actions
# show dividends
msft.dividends
# show splits
msft.splits
# show capital gains (for mutual funds & etfs)
msft.capital_gains
msft.capital_gains # only for mutual funds & etfs
# show share count
# - yearly summary:
msft.shares
msft.get_shares_full()
# - accurate time-series count:
msft.get_shares_full(start="2022-01-01", end=None)
# show financials:
# - income statement
@@ -100,13 +91,9 @@ msft.cashflow
msft.quarterly_cashflow
# see `Ticker.get_income_stmt()` for more options
# show major holders
# show holders
msft.major_holders
# show institutional holders
msft.institutional_holders
# show mutualfund holders
msft.mutualfund_holders
# show earnings
@@ -165,18 +152,7 @@ msft.option_chain(..., proxy="PROXY_SERVER")
...
```
To use a custom `requests` session (for example to cache calls to the
API or customize the `User-agent` header), pass a `session=` argument to
the Ticker constructor.
```python
import requests_cache
session = requests_cache.CachedSession('yfinance.cache')
session.headers['User-agent'] = 'my-program/1.0'
ticker = yf.Ticker('msft', session=session)
# The scraped response will be stored in the cache
ticker.actions
```
### Multiple tickers
To initialize multiple `Ticker` objects, use
@@ -191,69 +167,65 @@ tickers.tickers['AAPL'].history(period="1mo")
tickers.tickers['GOOG'].actions
```
### Fetching data for multiple tickers
To download price history into one table:
```python
import yfinance as yf
data = yf.download("SPY AAPL", start="2017-01-01", end="2017-04-30")
```
I've also added some options to make life easier :)
`yf.download()` and `Ticker.history()` have many options for configuring fetching and processing, e.g.:
```python
data = yf.download( # or pdr.get_data_yahoo(...
# tickers list or string as well
tickers = "SPY AAPL MSFT",
# use "period" instead of start/end
# valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
# (optional, default is '1mo')
period = "ytd",
# fetch data by interval (including intraday if period < 60 days)
# valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
# (optional, default is '1d')
interval = "5d",
# Whether to ignore timezone when aligning ticker data from
# different timezones. Default is False.
ignore_tz = False,
# group by ticker (to access via data['SPY'])
# (optional, default is 'column')
group_by = 'ticker',
# adjust all OHLC automatically
# (optional, default is False)
auto_adjust = True,
# attempt repair of missing data or currency mixups e.g. $/cents
repair = False,
# download pre/post regular market hours data
# (optional, default is False)
prepost = True,
# use threads for mass downloading? (True/False/Integer)
# (optional, default is True)
threads = True,
# proxy URL scheme use use when downloading?
# (optional, default is None)
proxy = None
)
yf.download(tickers = "SPY AAPL", # list of tickers
period = "1y", # time period
interval = "1d", # trading interval
prepost = False, # download pre/post market hours data?
repair = True) # repair obvious price errors e.g. 100x?
```
### Timezone cache store
Review the [Wiki](https://github.com/ranaroussi/yfinance/wiki) for more options and detail.
### Logging
`yfinance` now uses the `logging` module. To control the detail of printed messages you simply change the level:
```
import logging
logger = logging.getLogger('yfinance')
logger.setLevel(logging.ERROR) # default: only print errors
logger.setLevel(logging.CRITICAL) # disable printing
logger.setLevel(logging.DEBUG) # verbose: print errors & debug info
```
### Smarter scraping
To use a custom `requests` session (for example to cache calls to the
API or customize the `User-agent` header), pass a `session=` argument to
the Ticker constructor.
When fetching price data, all dates are localized to stock exchange timezone.
But timezone retrieval is relatively slow, so yfinance attemps to cache them
in your users cache folder.
You can direct cache to use a different location with `set_tz_cache_location()`:
```python
import yfinance as yf
yf.set_tz_cache_location("custom/cache/location")
...
import requests_cache
session = requests_cache.CachedSession('yfinance.cache')
session.headers['User-agent'] = 'my-program/1.0'
ticker = yf.Ticker('msft', session=session)
# The scraped response will be stored in the cache
ticker.actions
```
Combine a `requests_cache` with rate-limiting to avoid triggering Yahoo's rate-limiter/blocker that can corrupt data.
```python
from requests import Session
from requests_cache import CacheMixin, SQLiteCache
from requests_ratelimiter import LimiterMixin, MemoryQueueBucket
from pyrate_limiter import Duration, RequestRate, Limiter
class CachedLimiterSession(CacheMixin, LimiterMixin, Session):
pass
session = CachedLimiterSession(
limiter=Limiter(RequestRate(2, Duration.SECOND*5), # max 2 requests per 5 seconds
bucket_class=MemoryQueueBucket,
backend=SQLiteCache("yfinance.cache"),
)
```
### Managing Multi-Level Columns
@@ -271,9 +243,7 @@ yfinance?](https://stackoverflow.com/questions/63107801)
- How to download single or multiple tickers into a single
dataframe with single level column names and a ticker column
---
## `pandas_datareader` override
### `pandas_datareader` override
If your code uses `pandas_datareader` and you want to download data
faster, you can "hijack" `pandas_datareader.data.get_data_yahoo()`
@@ -290,6 +260,18 @@ yf.pdr_override() # <== that's all it takes :-)
data = pdr.get_data_yahoo("SPY", start="2017-01-01", end="2017-04-30")
```
### Timezone cache store
When fetching price data, all dates are localized to stock exchange timezone.
But timezone retrieval is relatively slow, so yfinance attemps to cache them
in your users cache folder.
You can direct cache to use a different location with `set_tz_cache_location()`:
```python
import yfinance as yf
yf.set_tz_cache_location("custom/cache/location")
...
```
---
## Installation
@@ -317,11 +299,15 @@ To install `yfinance` using `conda`, see
- [html5lib](https://pypi.org/project/html5lib) \>= 1.1
- [cryptography](https://pypi.org/project/cryptography) \>= 3.3.2
### Optional (if you want to use `pandas_datareader`)
#### Optional (if you want to use `pandas_datareader`)
- [pandas\_datareader](https://github.com/pydata/pandas-datareader)
\>= 0.4.0
## Developers: want to contribute?
`yfinance` relies on community to investigate bugs and contribute code. Developer guide: https://github.com/ranaroussi/yfinance/discussions/1084
---
### Legal Stuff

View File

@@ -1,5 +1,5 @@
{% set name = "yfinance" %}
{% set version = "0.2.6" %}
{% set version = "0.2.19b2" %}
package:
name: "{{ name|lower }}"

View File

@@ -15,6 +15,9 @@ Sanity check for most common library uses all working
import yfinance as yf
import unittest
import logging
logging.basicConfig(level=logging.DEBUG)
symbols = ['MSFT', 'IWO', 'VFINX', '^GSPC', 'BTC-USD']
tickers = [yf.Ticker(symbol) for symbol in symbols]

View File

@@ -24,9 +24,7 @@ class TestPriceHistory(unittest.TestCase):
def test_daily_index(self):
tkrs = ["BHP.AX", "IMP.JO", "BP.L", "PNL.L", "INTC"]
intervals = ["1d", "1wk", "1mo"]
for tkr in tkrs:
dat = yf.Ticker(tkr, session=self.session)
@@ -44,8 +42,8 @@ class TestPriceHistory(unittest.TestCase):
dt_utc = _tz.timezone("UTC").localize(_dt.datetime.utcnow())
dt = dt_utc.astimezone(_tz.timezone(tz))
df = dat.history(start=dt.date() - _dt.timedelta(days=1), interval="1h")
start_d = dt.date() - _dt.timedelta(days=7)
df = dat.history(start=start_d, interval="1h")
dt0 = df.index[-2]
dt1 = df.index[-1]
@@ -55,7 +53,6 @@ class TestPriceHistory(unittest.TestCase):
print("Ticker = ", tkr)
raise
def test_duplicatingDaily(self):
tkrs = ["IMP.JO", "BHG.JO", "SSW.JO", "BP.L", "INTC"]
test_run = False
@@ -110,22 +107,27 @@ class TestPriceHistory(unittest.TestCase):
def test_intraDayWithEvents(self):
# TASE dividend release pre-market, doesn't merge nicely with intra-day data so check still present
tkr = "ICL.TA"
# tkr = "ESLT.TA"
# tkr = "ONE.TA"
# tkr = "MGDL.TA"
start_d = _dt.date.today() - _dt.timedelta(days=60)
end_d = None
df_daily = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=True)
df_daily_divs = df_daily["Dividends"][df_daily["Dividends"] != 0]
if df_daily_divs.shape[0] == 0:
self.skipTest("Skipping test_intraDayWithEvents() because 'ICL.TA' has no dividend in last 60 days")
tase_tkrs = ["ICL.TA", "ESLT.TA", "ONE.TA", "MGDL.TA"]
test_run = False
for tkr in tase_tkrs:
start_d = _dt.date.today() - _dt.timedelta(days=59)
end_d = None
df_daily = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=True)
df_daily_divs = df_daily["Dividends"][df_daily["Dividends"] != 0]
if df_daily_divs.shape[0] == 0:
# self.skipTest("Skipping test_intraDayWithEvents() because 'ICL.TA' has no dividend in last 60 days")
continue
last_div_date = df_daily_divs.index[-1]
start_d = last_div_date.date()
end_d = last_div_date.date() + _dt.timedelta(days=1)
df = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="15m", actions=True)
self.assertTrue((df["Dividends"] != 0.0).any())
last_div_date = df_daily_divs.index[-1]
start_d = last_div_date.date()
end_d = last_div_date.date() + _dt.timedelta(days=1)
df = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="15m", actions=True)
self.assertTrue((df["Dividends"] != 0.0).any())
test_run = True
break
if not test_run:
self.skipTest("Skipping test_intraDayWithEvents() because no tickers had a dividend in last 60 days")
def test_dailyWithEvents(self):
# Reproduce issue #521
@@ -228,9 +230,13 @@ class TestPriceHistory(unittest.TestCase):
print("{}-without-events missing these dates: {}".format(tkr, missing_from_df2))
raise
def test_monthlyWithEvents2(self):
# Simply check no exception from internal merge
tkr = "ABBV"
yf.Ticker("ABBV").history(period="max", interval="1mo")
def test_tz_dst_ambiguous(self):
# Reproduce issue #1100
try:
yf.Ticker("ESLT.TA", session=self.session).history(start="2002-10-06", end="2002-10-09", interval="1d")
except _tz.exceptions.AmbiguousTimeError:
@@ -261,6 +267,116 @@ class TestPriceHistory(unittest.TestCase):
print("Weekly data not aligned to Monday")
raise
def test_prune_post_intraday_us(self):
# Half-day before USA Thanksgiving. Yahoo normally
# returns an interval starting when regular trading closes,
# even if prepost=False.
# Setup
tkr = "AMZN"
interval = "1h"
interval_td = _dt.timedelta(hours=1)
time_open = _dt.time(9, 30)
time_close = _dt.time(16)
special_day = _dt.date(2022, 11, 25)
time_early_close = _dt.time(13)
dat = yf.Ticker(tkr, session=self.session)
# Run
start_d = special_day - _dt.timedelta(days=7)
end_d = special_day + _dt.timedelta(days=7)
df = dat.history(start=start_d, end=end_d, interval=interval, prepost=False, keepna=True)
tg_last_dt = df.loc[str(special_day)].index[-1]
self.assertTrue(tg_last_dt.time() < time_early_close)
# Test no other afternoons (or mornings) were pruned
start_d = _dt.date(special_day.year, 1, 1)
end_d = _dt.date(special_day.year+1, 1, 1)
df = dat.history(start=start_d, end=end_d, interval="1h", prepost=False, keepna=True)
last_dts = _pd.Series(df.index).groupby(df.index.date).last()
f_early_close = (last_dts+interval_td).dt.time < time_close
early_close_dates = last_dts.index[f_early_close].values
self.assertEqual(len(early_close_dates), 1)
self.assertEqual(early_close_dates[0], special_day)
first_dts = _pd.Series(df.index).groupby(df.index.date).first()
f_late_open = first_dts.dt.time > time_open
late_open_dates = first_dts.index[f_late_open]
self.assertEqual(len(late_open_dates), 0)
def test_prune_post_intraday_omx(self):
# Half-day before Sweden Christmas. Yahoo normally
# returns an interval starting when regular trading closes,
# even if prepost=False.
# If prepost=False, test that yfinance is removing prepost intervals.
# Setup
tkr = "AEC.ST"
interval = "1h"
interval_td = _dt.timedelta(hours=1)
time_open = _dt.time(9)
time_close = _dt.time(17,30)
special_day = _dt.date(2022, 12, 23)
time_early_close = _dt.time(13, 2)
dat = yf.Ticker(tkr, session=self.session)
# Half trading day Jan 5, Apr 14, May 25, Jun 23, Nov 4, Dec 23, Dec 30
half_days = [_dt.date(special_day.year, x[0], x[1]) for x in [(1,5), (4,14), (5,25), (6,23), (11,4), (12,23), (12,30)]]
# Yahoo has incorrectly classified afternoon of 2022-04-13 as post-market.
# Nothing yfinance can do because Yahoo doesn't return data with prepost=False.
# But need to handle in this test.
expected_incorrect_half_days = [_dt.date(2022,4,13)]
half_days = sorted(half_days+expected_incorrect_half_days)
# Run
start_d = special_day - _dt.timedelta(days=7)
end_d = special_day + _dt.timedelta(days=7)
df = dat.history(start=start_d, end=end_d, interval=interval, prepost=False, keepna=True)
tg_last_dt = df.loc[str(special_day)].index[-1]
self.assertTrue(tg_last_dt.time() < time_early_close)
# Test no other afternoons (or mornings) were pruned
start_d = _dt.date(special_day.year, 1, 1)
end_d = _dt.date(special_day.year+1, 1, 1)
df = dat.history(start=start_d, end=end_d, interval="1h", prepost=False, keepna=True)
last_dts = _pd.Series(df.index).groupby(df.index.date).last()
f_early_close = (last_dts+interval_td).dt.time < time_close
early_close_dates = last_dts.index[f_early_close].values
unexpected_early_close_dates = [d for d in early_close_dates if not d in half_days]
self.assertEqual(len(unexpected_early_close_dates), 0)
self.assertEqual(len(early_close_dates), len(half_days))
self.assertTrue(_np.equal(early_close_dates, half_days).all())
first_dts = _pd.Series(df.index).groupby(df.index.date).first()
f_late_open = first_dts.dt.time > time_open
late_open_dates = first_dts.index[f_late_open]
self.assertEqual(len(late_open_dates), 0)
def test_prune_post_intraday_asx(self):
# Setup
tkr = "BHP.AX"
interval = "1h"
interval_td = _dt.timedelta(hours=1)
time_open = _dt.time(10)
time_close = _dt.time(16,12)
# No early closes in 2022
dat = yf.Ticker(tkr, session=self.session)
# Test no afternoons (or mornings) were pruned
start_d = _dt.date(2022, 1, 1)
end_d = _dt.date(2022+1, 1, 1)
df = dat.history(start=start_d, end=end_d, interval="1h", prepost=False, keepna=True)
last_dts = _pd.Series(df.index).groupby(df.index.date).last()
f_early_close = (last_dts+interval_td).dt.time < time_close
early_close_dates = last_dts.index[f_early_close].values
self.assertEqual(len(early_close_dates), 0)
first_dts = _pd.Series(df.index).groupby(df.index.date).first()
f_late_open = first_dts.dt.time > time_open
late_open_dates = first_dts.index[f_late_open]
self.assertEqual(len(late_open_dates), 0)
def test_weekly_2rows_fix(self):
tkr = "AMZN"
start = _dt.date.today() - _dt.timedelta(days=14)
@@ -270,11 +386,53 @@ class TestPriceHistory(unittest.TestCase):
df = dat.history(start=start, interval="1wk")
self.assertTrue((df.index.weekday == 0).all())
def test_aggregate_capital_gains(self):
# Setup
tkr = "FXAIX"
dat = yf.Ticker(tkr, session=self.session)
start = "2017-12-31"
end = "2019-12-31"
interval = "3mo"
df = dat.history(start=start, end=end, interval=interval)
class TestPriceRepair(unittest.TestCase):
session = None
@classmethod
def setUpClass(cls):
cls.session = requests_cache.CachedSession(backend='memory')
@classmethod
def tearDownClass(cls):
if cls.session is not None:
cls.session.close()
def test_reconstruct_2m(self):
# 2m repair requires 1m data.
# Yahoo restricts 1m fetches to 7 days max within last 30 days.
# Need to test that '_reconstruct_intervals_batch()' can handle this.
tkrs = ["BHP.AX", "IMP.JO", "BP.L", "PNL.L", "INTC"]
dt_now = _pd.Timestamp.utcnow()
td_7d = _dt.timedelta(days=7)
td_60d = _dt.timedelta(days=60)
# Round time for 'requests_cache' reuse
dt_now = dt_now.ceil("1h")
for tkr in tkrs:
dat = yf.Ticker(tkr, session=self.session)
end_dt = dt_now
start_dt = end_dt - td_60d
df = dat.history(start=start_dt, end=end_dt, interval="2m", repair=True)
def test_repair_100x_weekly(self):
# Setup:
tkr = "PNL.L"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.info["exchangeTimezoneName"]
tz_exchange = dat.fast_info["timezone"]
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
df = _pd.DataFrame(data={"Open": [470.5, 473.5, 474.5, 470],
@@ -283,22 +441,22 @@ class TestPriceHistory(unittest.TestCase):
"Close": [475, 473.5, 472, 473.5],
"Adj Close": [475, 473.5, 472, 473.5],
"Volume": [2295613, 2245604, 3000287, 2635611]},
index=_pd.to_datetime([_dt.date(2022, 10, 23),
_dt.date(2022, 10, 16),
_dt.date(2022, 10, 9),
_dt.date(2022, 10, 2)]))
index=_pd.to_datetime([_dt.date(2022, 10, 24),
_dt.date(2022, 10, 17),
_dt.date(2022, 10, 10),
_dt.date(2022, 10, 3)]))
df = df.sort_index()
df.index.name = "Date"
df_bad = df.copy()
df_bad.loc["2022-10-23", "Close"] *= 100
df_bad.loc["2022-10-16", "Low"] *= 100
df_bad.loc["2022-10-2", "Open"] *= 100
df_bad.loc["2022-10-24", "Close"] *= 100
df_bad.loc["2022-10-17", "Low"] *= 100
df_bad.loc["2022-10-03", "Open"] *= 100
df.index = df.index.tz_localize(tz_exchange)
df_bad.index = df_bad.index.tz_localize(tz_exchange)
# Run test
df_repaired = dat._fix_unit_mixups(df_bad, "1wk", tz_exchange)
df_repaired = dat._fix_unit_mixups(df_bad, "1wk", tz_exchange, prepost=False)
# First test - no errors left
for c in data_cols:
@@ -321,12 +479,15 @@ class TestPriceHistory(unittest.TestCase):
f_1 = ratio == 1
self.assertTrue((f_100 | f_1).all())
self.assertTrue("Repaired?" in df_repaired.columns)
self.assertFalse(df_repaired["Repaired?"].isna().any())
def test_repair_100x_weekly_preSplit(self):
# PNL.L has a stock-split in 2022. Sometimes requesting data before 2022 is not split-adjusted.
tkr = "PNL.L"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.info["exchangeTimezoneName"]
tz_exchange = dat.fast_info["timezone"]
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
df = _pd.DataFrame(data={"Open": [400, 398, 392.5, 417],
@@ -353,7 +514,7 @@ class TestPriceHistory(unittest.TestCase):
df.index = df.index.tz_localize(tz_exchange)
df_bad.index = df_bad.index.tz_localize(tz_exchange)
df_repaired = dat._fix_unit_mixups(df_bad, "1wk", tz_exchange)
df_repaired = dat._fix_unit_mixups(df_bad, "1wk", tz_exchange, prepost=False)
# First test - no errors left
for c in data_cols:
@@ -378,10 +539,13 @@ class TestPriceHistory(unittest.TestCase):
f_1 = ratio == 1
self.assertTrue((f_100 | f_1).all())
self.assertTrue("Repaired?" in df_repaired.columns)
self.assertFalse(df_repaired["Repaired?"].isna().any())
def test_repair_100x_daily(self):
tkr = "PNL.L"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.info["exchangeTimezoneName"]
tz_exchange = dat.fast_info["timezone"]
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
df = _pd.DataFrame(data={"Open": [478, 476, 476, 472],
@@ -403,7 +567,7 @@ class TestPriceHistory(unittest.TestCase):
df.index = df.index.tz_localize(tz_exchange)
df_bad.index = df_bad.index.tz_localize(tz_exchange)
df_repaired = dat._fix_unit_mixups(df_bad, "1d", tz_exchange)
df_repaired = dat._fix_unit_mixups(df_bad, "1d", tz_exchange, prepost=False)
# First test - no errors left
for c in data_cols:
@@ -420,10 +584,13 @@ class TestPriceHistory(unittest.TestCase):
f_1 = ratio == 1
self.assertTrue((f_100 | f_1).all())
self.assertTrue("Repaired?" in df_repaired.columns)
self.assertFalse(df_repaired["Repaired?"].isna().any())
def test_repair_zeroes_daily(self):
tkr = "BBIL.L"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.info["exchangeTimezoneName"]
tz_exchange = dat.fast_info["timezone"]
df_bad = _pd.DataFrame(data={"Open": [0, 102.04, 102.04],
"High": [0, 102.1, 102.11],
@@ -438,7 +605,7 @@ class TestPriceHistory(unittest.TestCase):
df_bad.index.name = "Date"
df_bad.index = df_bad.index.tz_localize(tz_exchange)
repaired_df = dat._fix_zeroes(df_bad, "1d", tz_exchange)
repaired_df = dat._fix_zeroes(df_bad, "1d", tz_exchange, prepost=False)
correct_df = df_bad.copy()
correct_df.loc["2022-11-01", "Open"] = 102.080002
@@ -447,46 +614,43 @@ class TestPriceHistory(unittest.TestCase):
for c in ["Open", "Low", "High", "Close"]:
self.assertTrue(_np.isclose(repaired_df[c], correct_df[c], rtol=1e-8).all())
self.assertTrue("Repaired?" in repaired_df.columns)
self.assertFalse(repaired_df["Repaired?"].isna().any())
def test_repair_zeroes_hourly(self):
tkr = "INTC"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.info["exchangeTimezoneName"]
tz_exchange = dat.fast_info["timezone"]
df_bad = _pd.DataFrame(data={"Open": [29.68, 29.49, 29.545, _np.nan, 29.485],
"High": [29.68, 29.625, 29.58, _np.nan, 29.49],
"Low": [29.46, 29.4, 29.45, _np.nan, 29.31],
"Close": [29.485, 29.545, 29.485, _np.nan, 29.325],
"Adj Close": [29.485, 29.545, 29.485, _np.nan, 29.325],
"Volume": [3258528, 2140195, 1621010, 0, 0]},
index=_pd.to_datetime([_dt.datetime(2022,11,25, 9,30),
_dt.datetime(2022,11,25, 10,30),
_dt.datetime(2022,11,25, 11,30),
_dt.datetime(2022,11,25, 12,30),
_dt.datetime(2022,11,25, 13,00)]))
df_bad = df_bad.sort_index()
df_bad.index.name = "Date"
df_bad.index = df_bad.index.tz_localize(tz_exchange)
correct_df = dat.history(period="1wk", interval="1h", auto_adjust=False, repair=True)
repaired_df = dat._fix_zeroes(df_bad, "1h", tz_exchange)
df_bad = correct_df.copy()
bad_idx = correct_df.index[10]
df_bad.loc[bad_idx, "Open"] = _np.nan
df_bad.loc[bad_idx, "High"] = _np.nan
df_bad.loc[bad_idx, "Low"] = _np.nan
df_bad.loc[bad_idx, "Close"] = _np.nan
df_bad.loc[bad_idx, "Adj Close"] = _np.nan
df_bad.loc[bad_idx, "Volume"] = 0
repaired_df = dat._fix_zeroes(df_bad, "1h", tz_exchange, prepost=False)
correct_df = df_bad.copy()
idx = _pd.Timestamp(2022,11,25, 12,30).tz_localize(tz_exchange)
correct_df.loc[idx, "Open"] = 29.485001
correct_df.loc[idx, "High"] = 29.49
correct_df.loc[idx, "Low"] = 29.43
correct_df.loc[idx, "Close"] = 29.455
correct_df.loc[idx, "Adj Close"] = 29.455
correct_df.loc[idx, "Volume"] = 609164
for c in ["Open", "Low", "High", "Close"]:
try:
self.assertTrue(_np.isclose(repaired_df[c], correct_df[c], rtol=1e-7).all())
except:
print("COLUMN", c)
print("- repaired_df")
print(repaired_df)
print("- correct_df[c]:")
print(correct_df[c])
print("- diff:")
print(repaired_df[c] - correct_df[c])
raise
self.assertTrue("Repaired?" in repaired_df.columns)
self.assertFalse(repaired_df["Repaired?"].isna().any())
if __name__ == '__main__':
unittest.main()

View File

@@ -52,12 +52,16 @@ class TestTicker(unittest.TestCase):
def test_badTicker(self):
# Check yfinance doesn't die when ticker delisted
tkr = "AM2Z.TA"
tkr = "DJI" # typo of "^DJI"
dat = yf.Ticker(tkr, session=self.session)
dat.history(period="1wk")
dat.history(start="2022-01-01")
dat.history(start="2022-01-01", end="2022-03-01")
yf.download([tkr], period="1wk")
for k in dat.fast_info:
dat.fast_info[k]
dat.isin
dat.major_holders
dat.institutional_holders
@@ -91,43 +95,48 @@ class TestTicker(unittest.TestCase):
def test_goodTicker(self):
# that yfinance works when full api is called on same instance of ticker
tkr = "IBM"
dat = yf.Ticker(tkr, session=self.session)
tkrs = ["IBM"]
tkrs.append("QCSTIX") # weird ticker, no price history but has previous close
for tkr in tkrs:
dat = yf.Ticker(tkr, session=self.session)
dat.isin
dat.major_holders
dat.institutional_holders
dat.mutualfund_holders
dat.dividends
dat.splits
dat.actions
dat.shares
dat.get_shares_full()
dat.info
dat.calendar
dat.recommendations
dat.earnings
dat.quarterly_earnings
dat.income_stmt
dat.quarterly_income_stmt
dat.balance_sheet
dat.quarterly_balance_sheet
dat.cashflow
dat.quarterly_cashflow
dat.recommendations_summary
dat.analyst_price_target
dat.revenue_forecasts
dat.sustainability
dat.options
dat.news
dat.earnings_trend
dat.earnings_dates
dat.earnings_forecasts
dat.history(period="1wk")
dat.history(start="2022-01-01")
dat.history(start="2022-01-01", end="2022-03-01")
yf.download([tkr], period="1wk")
dat.history(period="1wk")
dat.history(start="2022-01-01")
dat.history(start="2022-01-01", end="2022-03-01")
yf.download([tkr], period="1wk")
for k in dat.fast_info:
dat.fast_info[k]
dat.isin
dat.major_holders
dat.institutional_holders
dat.mutualfund_holders
dat.dividends
dat.splits
dat.actions
dat.shares
dat.get_shares_full()
dat.info
dat.calendar
dat.recommendations
dat.earnings
dat.quarterly_earnings
dat.income_stmt
dat.quarterly_income_stmt
dat.balance_sheet
dat.quarterly_balance_sheet
dat.cashflow
dat.quarterly_cashflow
dat.recommendations_summary
dat.analyst_price_target
dat.revenue_forecasts
dat.sustainability
dat.options
dat.news
dat.earnings_trend
dat.earnings_dates
dat.earnings_forecasts
class TestTickerHistory(unittest.TestCase):
@@ -678,39 +687,50 @@ class TestTickerInfo(unittest.TestCase):
cls.session.close()
def setUp(self):
tkrs = ["ESLT.TA", "BP.L", "GOOGL"]
self.tickers = [yf.Ticker(tkr, session=self.session) for tkr in tkrs]
self.symbols = []
self.symbols += ["ESLT.TA", "BP.L", "GOOGL"]
self.symbols.append("QCSTIX") # good for testing, doesn't trade
self.symbols += ["BTC-USD", "IWO", "VFINX", "^GSPC"]
self.symbols += ["SOKE.IS", "ADS.DE"] # detected bugs
self.tickers = [yf.Ticker(s, session=self.session) for s in self.symbols]
def tearDown(self):
self.ticker = None
def test_info(self):
data = self.ticker.info
data = self.tickers[0].info
self.assertIsInstance(data, dict, "data has wrong type")
self.assertIn("symbol", data.keys(), "Did not find expected key in info dict")
self.assertEqual("GOOGL", data["symbol"], "Wrong symbol value in info dict")
self.assertEqual(self.symbols[0], data["symbol"], "Wrong symbol value in info dict")
def test_basic_info(self):
def test_fast_info(self):
f = yf.Ticker("AAPL", session=self.session).fast_info
for k in f:
self.assertIsNotNone(f[k])
def test_fast_info_matches_info(self):
yf.scrapers.quote.PRUNE_INFO = False
# basic_info_keys = self.ticker.basic_info.keys()
basic_info_keys = set()
fast_info_keys = set()
for ticker in self.tickers:
basic_info_keys.update(set(ticker.basic_info.keys()))
basic_info_keys = sorted(list(basic_info_keys))
fast_info_keys.update(set(ticker.fast_info.keys()))
fast_info_keys = sorted(list(fast_info_keys))
key_rename_map = {}
key_rename_map["currency"] = "currency"
key_rename_map["quote_type"] = "quoteType"
key_rename_map["timezone"] = "exchangeTimezoneName"
key_rename_map["last_price"] = ["currentPrice", "regularMarketPrice"]
key_rename_map["open"] = ["open", "regularMarketOpen"]
key_rename_map["day_high"] = ["dayHigh", "regularMarketDayHigh"]
key_rename_map["day_low"] = ["dayLow", "regularMarketDayLow"]
key_rename_map["previous_close"] = ["previousClose", "regularMarketPreviousClose"]
# preMarketPrice
key_rename_map["previous_close"] = ["previousClose"]
key_rename_map["regular_market_previous_close"] = ["regularMarketPreviousClose"]
key_rename_map["fifty_day_average"] = "fiftyDayAverage"
key_rename_map["two_hundred_day_average"] = "twoHundredDayAverage"
key_rename_map["year_change"] = "52WeekChange"
key_rename_map["year_change"] = ["52WeekChange", "fiftyTwoWeekChange"]
key_rename_map["year_high"] = "fiftyTwoWeekHigh"
key_rename_map["year_low"] = "fiftyTwoWeekLow"
@@ -719,25 +739,31 @@ class TestTickerInfo(unittest.TestCase):
key_rename_map["three_month_average_volume"] = "averageVolume"
key_rename_map["market_cap"] = "marketCap"
key_rename_map["shares"] = "floatShares"
key_rename_map["timezone"] = "exchangeTimezoneName"
key_rename_map["shares"] = "sharesOutstanding"
approximate_keys = {"fifty_day_average", "ten_day_average_volume"}
approximate_keys.update({"market_cap"})
for k in list(key_rename_map.keys()):
if '_' in k:
key_rename_map[yf.utils.snake_case_2_camelCase(k)] = key_rename_map[k]
# bad_keys = []
# Note: share count items in info[] are bad. Sometimes the float > outstanding!
# So often fast_info["shares"] does not match.
# Why isn't fast_info["shares"] wrong? Because using it to calculate market cap always correct.
bad_keys = {"shares"}
# Loose tolerance for averages, no idea why don't match info[]. Is info wrong?
custom_tolerances = {}
custom_tolerances["year_change"] = 1.0
# custom_tolerances["ten_day_average_volume"] = 1e-3
custom_tolerances["ten_day_average_volume"] = 1e-1
# custom_tolerances["three_month_average_volume"] = 1e-2
custom_tolerances["three_month_average_volume"] = 5e-1
custom_tolerances["fifty_day_average"] = 1e-2
custom_tolerances["two_hundred_day_average"] = 1e-2
for k in list(custom_tolerances.keys()):
if '_' in k:
custom_tolerances[yf.utils.snake_case_2_camelCase(k)] = custom_tolerances[k]
for k in basic_info_keys:
for k in fast_info_keys:
if k in key_rename_map:
k2 = key_rename_map[k]
else:
@@ -749,11 +775,10 @@ class TestTickerInfo(unittest.TestCase):
for m in k2:
for ticker in self.tickers:
if not m in ticker.info:
print(sorted(list(ticker.info.keys())))
raise Exception("Need to add/fix mapping for basic_info key", k)
# print(f"symbol={ticker.ticker}: fast_info key '{k}' mapped to info key '{m}' but not present in info")
continue
if k in bad_keys:
# Doesn't match, investigate why
continue
if k in custom_tolerances:
@@ -762,14 +787,25 @@ class TestTickerInfo(unittest.TestCase):
rtol = 5e-3
# rtol = 1e-4
print(f"Testing key {m} -> {k} ticker={ticker.ticker}")
# if k in approximate_keys:
v1 = ticker.basic_info[k]
v2 = ticker.info[m]
if isinstance(v1, float) or isinstance(v2, int):
self.assertTrue(np.isclose(v1, v2, rtol=rtol), f"{k}: {v1} != {v2}")
else:
self.assertEqual(v1, v2, f"{k}: {v1} != {v2}")
correct = ticker.info[m]
test = ticker.fast_info[k]
# print(f"Testing: symbol={ticker.ticker} m={m} k={k}: test={test} vs correct={correct}")
if k in ["market_cap","marketCap"] and ticker.fast_info["currency"] in ["GBp", "ILA"]:
# Adjust for currency to match Yahoo:
test *= 0.01
try:
if correct is None:
self.assertTrue(test is None or (not np.isnan(test)), f"{k}: {test} must be None or real value because correct={correct}")
elif isinstance(test, float) or isinstance(correct, int):
self.assertTrue(np.isclose(test, correct, rtol=rtol), f"{ticker.ticker} {k}: {test} != {correct}")
else:
self.assertEqual(test, correct, f"{k}: {test} != {correct}")
except:
if k in ["regularMarketPreviousClose"] and ticker.ticker in ["ADS.DE"]:
# Yahoo is wrong, is returning post-market close not regular
continue
else:
raise
@@ -780,6 +816,7 @@ def suite():
suite.addTest(TestTickerHolders('Test holders'))
suite.addTest(TestTickerHistory('Test Ticker history'))
suite.addTest(TestTickerMiscFinancials('Test misc financials'))
suite.addTest(TestTickerInfo('Test info & fast_info'))
return suite

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,7 @@
import functools
from functools import lru_cache
import logging
import hashlib
from base64 import b64decode
usePycryptodome = False # slightly faster
@@ -14,6 +15,9 @@ else:
import requests as requests
import re
from bs4 import BeautifulSoup
import random
import time
from frozendict import frozendict
@@ -22,8 +26,12 @@ try:
except ImportError:
import json as json
from . import utils
cache_maxsize = 64
logger = utils.get_yf_logger()
def lru_cache_freezeargs(func):
"""
@@ -46,67 +54,38 @@ def lru_cache_freezeargs(func):
return wrapped
def decrypt_cryptojs_aes_stores(data):
def _extract_extra_keys_from_stores(data):
new_keys = [k for k in data.keys() if k not in ["context", "plugins"]]
new_keys_values = set([data[k] for k in new_keys])
# Maybe multiple keys have same value - keep one of each
new_keys_uniq = []
new_keys_uniq_values = set()
for k in new_keys:
v = data[k]
if not v in new_keys_uniq_values:
new_keys_uniq.append(k)
new_keys_uniq_values.add(v)
return [data[k] for k in new_keys_uniq]
def decrypt_cryptojs_aes_stores(data, keys=None):
encrypted_stores = data['context']['dispatcher']['stores']
password = None
candidate_passwords = []
if keys is not None:
if not isinstance(keys, list):
raise TypeError("'keys' must be list")
candidate_passwords = keys
else:
candidate_passwords = []
if "_cs" in data and "_cr" in data:
_cs = data["_cs"]
_cr = data["_cr"]
_cr = b"".join(int.to_bytes(i, length=4, byteorder="big", signed=True) for i in json.loads(_cr)["words"])
password = hashlib.pbkdf2_hmac("sha1", _cs.encode("utf8"), _cr, 1, dklen=32).hex()
else:
# Currently assume one extra key in dict, which is password. Print error if
# more extra keys detected.
new_keys = [k for k in data.keys() if k not in ["context", "plugins"]]
new_keys_values = set([data[k] for k in new_keys])
# Maybe multiple keys have same value - keep one of each
new_keys2 = []
new_keys2_values = set()
for k in new_keys:
v = data[k]
if not v in new_keys2_values:
new_keys2.append(k)
new_keys2_values.add(v)
l = len(new_keys)
if l == 0:
return None
elif l == 1 and isinstance(data[new_keys[0]], str):
password_key = new_keys[0]
# else:
# msg = "Yahoo has again changed data format, yfinance now unsure which key(s) is for decryption:"
# new_keys_pretty = {}
# l = min(10, len(new_keys))
# for i in range(0, l):
# k = new_keys[i]
# k_str = k if len(k) < 32 else k[:32-3]+"..."
# v = data[k]
# v_type = type(v)
# v_str = str(v)
# if len(v_str) > 256:
# v_str = v_str[:256]+"..."
# new_keys_pretty[k_str] = f"{v_str}' ({v_type})"
# for k in new_keys_pretty:
# msg += '\n' + f"'{k}' -> '{new_keys_pretty[k]}'"
# if len(new_keys) > l:
# d = len(new_keys) - l
# msg += '\n' + "..."
# msg += '\n' + f"{d} more options!"
# raise Exception(msg)
# password_key = new_keys[0]
# password = data[password_key]
# The above attempt to smartly pick out decryption key has stopped working.
# Fortunately the keys Yahoo use are currently hardcoded in their JSON:
candidate_passwords += ["ad4d90b3c9f2e1d156ef98eadfa0ff93e4042f6960e54aa2a13f06f528e6b50ba4265a26a1fd5b9cd3db0d268a9c34e1d080592424309429a58bce4adc893c87", \
"e9a8ab8e5620b712ebc2fb4f33d5c8b9c80c0d07e8c371911c785cf674789f1747d76a909510158a7b7419e86857f2d7abbd777813ff64840e4cbc514d12bcae",
"6ae2523aeafa283dad746556540145bf603f44edbf37ad404d3766a8420bb5eb1d3738f52a227b88283cca9cae44060d5f0bba84b6a495082589f5fe7acbdc9e",
"3365117c2a368ffa5df7313a4a84988f73926a86358e8eea9497c5ff799ce27d104b68e5f2fbffa6f8f92c1fef41765a7066fa6bcf050810a9c4c7872fd3ebf0"]
# candidate_passwords += [data[k] for k in new_keys] # don't do these, none work
encrypted_stores = b64decode(encrypted_stores)
assert encrypted_stores[0:8] == b"Salted__"
@@ -187,7 +166,7 @@ def decrypt_cryptojs_aes_stores(data):
except:
pass
if not success:
raise Exception("yfinance failed to decrypt Yahoo data response with hardcoded keys, contact developers")
raise Exception("yfinance failed to decrypt Yahoo data response")
decoded_stores = json.loads(plaintext)
return decoded_stores
@@ -230,6 +209,72 @@ class TickerData:
proxy = {"https": proxy}
return proxy
def get_raw_json(self, url, user_agent_headers=None, params=None, proxy=None, timeout=30):
response = self.get(url, user_agent_headers=user_agent_headers, params=params, proxy=proxy, timeout=timeout)
response.raise_for_status()
return response.json()
def _get_decryption_keys_from_yahoo_js(self, soup):
result = None
key_count = 4
re_script = soup.find("script", string=re.compile("root.App.main")).text
re_data = json.loads(re.search("root.App.main\s+=\s+(\{.*\})", re_script).group(1))
re_data.pop("context", None)
key_list = list(re_data.keys())
if re_data.get("plugins"): # 1) attempt to get last 4 keys after plugins
ind = key_list.index("plugins")
if len(key_list) > ind+1:
sub_keys = key_list[ind+1:]
if len(sub_keys) == key_count:
re_obj = {}
missing_val = False
for k in sub_keys:
if not re_data.get(k):
missing_val = True
break
re_obj.update({k: re_data.get(k)})
if not missing_val:
result = re_obj
if not result is None:
return [''.join(result.values())]
re_keys = [] # 2) attempt scan main.js file approach to get keys
prefix = "https://s.yimg.com/uc/finance/dd-site/js/main."
tags = [tag['src'] for tag in soup.find_all('script') if prefix in tag.get('src', '')]
for t in tags:
response_js = self.cache_get(t)
#
if response_js.status_code != 200:
time.sleep(random.randrange(10, 20))
response_js.close()
else:
r_data = response_js.content.decode("utf8")
re_list = [
x.group() for x in re.finditer(r"context.dispatcher.stores=JSON.parse((?:.*?\r?\n?)*)toString", r_data)
]
for rl in re_list:
re_sublist = [x.group() for x in re.finditer(r"t\[\"((?:.*?\r?\n?)*)\"\]", rl)]
if len(re_sublist) == key_count:
re_keys = [sl.replace('t["', '').replace('"]', '') for sl in re_sublist]
break
response_js.close()
if len(re_keys) == key_count:
break
if len(re_keys) > 0:
re_obj = {}
missing_val = False
for k in re_keys:
if not re_data.get(k):
missing_val = True
break
re_obj.update({k: re_data.get(k)})
if not missing_val:
return [''.join(re_obj.values())]
return []
@lru_cache_freezeargs
@lru_cache(maxsize=cache_maxsize)
def get_json_data_stores(self, sub_page: str = None, proxy=None) -> dict:
@@ -241,7 +286,8 @@ class TickerData:
else:
ticker_url = "{}/{}".format(_SCRAPE_URL_, self.ticker)
html = self.get(url=ticker_url, proxy=proxy).text
response = self.get(url=ticker_url, proxy=proxy)
html = response.text
# The actual json-data for stores is in a javascript assignment in the webpage
try:
@@ -253,7 +299,28 @@ class TickerData:
data = json.loads(json_str)
stores = decrypt_cryptojs_aes_stores(data)
# Gather decryption keys:
soup = BeautifulSoup(response.content, "html.parser")
keys = self._get_decryption_keys_from_yahoo_js(soup)
if len(keys) == 0:
msg = "No decryption keys could be extracted from JS file."
if "requests_cache" in str(type(response)):
msg += " Try flushing your 'requests_cache', probably parsing old JS."
logger.warning("%s Falling back to backup decrypt methods.", msg)
if len(keys) == 0:
keys = []
try:
extra_keys = _extract_extra_keys_from_stores(data)
keys = [''.join(extra_keys[-4:])]
except:
pass
#
keys_url = "https://github.com/ranaroussi/yfinance/raw/main/yfinance/scrapers/yahoo-keys.txt"
response_gh = self.cache_get(keys_url)
keys += response_gh.text.splitlines()
# Decrypt!
stores = decrypt_cryptojs_aes_stores(data, keys)
if stores is None:
# Maybe Yahoo returned old format, not encrypted
if "context" in data and "dispatcher" in data["context"]:

View File

@@ -21,6 +21,8 @@
from __future__ import print_function
import logging
import traceback
import time as _time
import multitasking as _multitasking
import pandas as _pd
@@ -28,10 +30,9 @@ import pandas as _pd
from . import Ticker, utils
from . import shared
def download(tickers, start=None, end=None, actions=False, threads=True, ignore_tz=False,
def download(tickers, start=None, end=None, actions=False, threads=True, ignore_tz=None,
group_by='column', auto_adjust=False, back_adjust=False, repair=False, keepna=False,
progress=True, period="max", show_errors=True, interval="1d", prepost=False,
progress=True, period="max", show_errors=None, interval="1d", prepost=False,
proxy=None, rounding=False, timeout=10):
"""Download yahoo tickers
:Parameters:
@@ -44,11 +45,13 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
Valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
Intraday data cannot extend last 60 days
start: str
Download start date string (YYYY-MM-DD) or _datetime.
Download start date string (YYYY-MM-DD) or _datetime, inclusive.
Default is 1900-01-01
E.g. for start="2020-01-01", the first data point will be on "2020-01-01"
end: str
Download end date string (YYYY-MM-DD) or _datetime.
Download end date string (YYYY-MM-DD) or _datetime, exclusive.
Default is now
E.g. for end="2023-01-01", the last data point will be on "2022-12-31"
group_by : str
Group by 'ticker' or 'column' (default)
prepost : bool
@@ -68,18 +71,35 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
How many threads to use for mass downloading. Default is True
ignore_tz: bool
When combining from different timezones, ignore that part of datetime.
Default is False
Default depends on interval. Intraday = False. Day+ = True.
proxy: str
Optional. Proxy server URL scheme. Default is None
rounding: bool
Optional. Round values to 2 decimal places?
show_errors: bool
Optional. Doesn't print errors if False
DEPRECATED, will be removed in future version
timeout: None or float
If not None stops waiting for a response after given number of
seconds. (Can also be a fraction of a second e.g. 0.01)
"""
if show_errors is not None:
if show_errors:
utils.print_once(f"yfinance: download(show_errors={show_errors}) argument is deprecated and will be removed in future version. Do this instead: logging.getLogger('yfinance').setLevel(logging.ERROR)")
logging.getLogger('yfinance').setLevel(logging.ERROR)
else:
utils.print_once(f"yfinance: download(show_errors={show_errors}) argument is deprecated and will be removed in future version. Do this instead to suppress error messages: logging.getLogger('yfinance').setLevel(logging.CRITICAL)")
logging.getLogger('yfinance').setLevel(logging.CRITICAL)
if ignore_tz is None:
# Set default value depending on interval
if interval[1:] in ['m', 'h']:
# Intraday
ignore_tz = False
else:
ignore_tz = True
# create ticker list
tickers = tickers if isinstance(
tickers, (list, set, tuple)) else tickers.replace(',', ' ').split()
@@ -104,6 +124,7 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
# reset shared._DFS
shared._DFS = {}
shared._ERRORS = {}
shared._TRACEBACKS = {}
# download using threads
if threads:
@@ -136,12 +157,31 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
if progress:
shared._PROGRESS_BAR.completed()
if shared._ERRORS and show_errors:
print('\n%.f Failed download%s:' % (
if shared._ERRORS:
logger = utils.get_yf_logger()
logger.error('\n%.f Failed download%s:' % (
len(shared._ERRORS), 's' if len(shared._ERRORS) > 1 else ''))
# print(shared._ERRORS)
print("\n".join(['- %s: %s' %
v for v in list(shared._ERRORS.items())]))
# Print each distinct error once, with list of symbols affected
errors = {}
for ticker in shared._ERRORS:
err = shared._ERRORS[ticker]
if not err in errors:
errors[err] = [ticker]
else:
errors[err].append(ticker)
for err in errors.keys():
logger.error(f'{errors[err]}: ' + err)
# Print each distinct traceback once, with list of symbols affected
tbs = {}
for ticker in shared._ERRORS:
tb = shared._TRACEBACKS[ticker]
if not tb in tbs:
tbs[tb] = [ticker]
else:
tbs[tb].append(ticker)
for tb in tbs.keys():
logger.debug(f'{tbs[tb]}: ' + tb)
if ignore_tz:
for tkr in shared._DFS.keys():
@@ -205,6 +245,7 @@ def _download_one_threaded(ticker, start=None, end=None,
keepna, timeout)
except Exception as e:
# glob try/except needed as current thead implementation breaks if exception is raised.
shared._TRACEBACKS[ticker] = traceback.format_exc()
shared._DFS[ticker] = utils.empty_df()
shared._ERRORS[ticker] = repr(e)
else:
@@ -224,5 +265,5 @@ def _download_one(ticker, start=None, end=None,
actions=actions, auto_adjust=auto_adjust,
back_adjust=back_adjust, repair=repair, proxy=proxy,
rounding=rounding, keepna=keepna, timeout=timeout,
debug=False, raise_errors=False # debug and raise_errors false to not log and raise errors in threads
raise_errors=False # stop individual threads raising errors
)

View File

@@ -58,7 +58,7 @@ class Analysis:
analysis_data = analysis_data['QuoteSummaryStore']
except KeyError as e:
err_msg = "No analysis data found, symbol may be delisted"
print('- %s: %s' % (self._data.ticker, err_msg))
logger.error('%s: %s', self._data.ticker, err_msg)
return
if isinstance(analysis_data.get('earningsTrend'), dict):

View File

@@ -1,4 +1,5 @@
import datetime
import logging
import json
import pandas as pd
@@ -8,6 +9,7 @@ from yfinance import utils
from yfinance.data import TickerData
from yfinance.exceptions import YFinanceDataException, YFinanceException
logger = utils.get_yf_logger()
class Fundamentals:
@@ -50,7 +52,7 @@ class Fundamentals:
self._fin_data_quote = self._financials_data['QuoteSummaryStore']
except KeyError:
err_msg = "No financials data found, symbol may be delisted"
print('- %s: %s' % (self._data.ticker, err_msg))
logger.error('%s: %s', self._data.ticker, err_msg)
return None
def _scrape_earnings(self, proxy):
@@ -144,7 +146,7 @@ class Financials:
if statement is not None:
return statement
except YFinanceException as e:
print(f"- {self._data.ticker}: Failed to create {name} financials table for reason: {repr(e)}")
logger.error("%s: Failed to create %s financials table for reason: %r", self._data.ticker, name, e)
return pd.DataFrame()
def _create_financials_table(self, name, timescale, proxy):
@@ -267,7 +269,7 @@ class Financials:
if statement is not None:
return statement
except YFinanceException as e:
print(f"- {self._data.ticker}: Failed to create financials table for {name} reason: {repr(e)}")
logger.error("%s: Failed to create financials table for %s reason: %r", self._data.ticker, name, e)
return pd.DataFrame()
def _create_financials_table_old(self, name, timescale, proxy):

View File

@@ -1,26 +1,29 @@
import datetime
import logging
import json
import warnings
import pandas as pd
import numpy as _np
from yfinance import utils
from yfinance.data import TickerData
logger = utils.get_yf_logger()
info_retired_keys_price = {"currentPrice", "dayHigh", "dayLow", "open", "previousClose", "volume"}
info_retired_keys_price = {"currentPrice", "dayHigh", "dayLow", "open", "previousClose", "volume", "volume24Hr"}
info_retired_keys_price.update({"regularMarket"+s for s in ["DayHigh", "DayLow", "Open", "PreviousClose", "Price", "Volume"]})
info_retired_keys_price.update({"fiftyTwoWeekLow", "fiftyTwoWeekHigh", "fiftyTwoWeekChange", "fiftyDayAverage", "twoHundredDayAverage"})
info_retired_keys_price.update({"fiftyTwoWeekLow", "fiftyTwoWeekHigh", "fiftyTwoWeekChange", "52WeekChange", "fiftyDayAverage", "twoHundredDayAverage"})
info_retired_keys_price.update({"averageDailyVolume10Day", "averageVolume10days", "averageVolume"})
info_retired_keys_exchange = {"currency", "exchange", "exchangeTimezoneName", "exchangeTimezoneShortName"}
info_retired_keys_exchange = {"currency", "exchange", "exchangeTimezoneName", "exchangeTimezoneShortName", "quoteType"}
info_retired_keys_marketCap = {"marketCap"}
info_retired_keys_symbol = {"symbol"}
info_retired_keys = info_retired_keys_price | info_retired_keys_exchange | info_retired_keys_marketCap | info_retired_keys_symbol
#
info_retired_keys = []
PRUNE_INFO = True
# PRUNE_INFO = False
_BASIC_URL_ = "https://query2.finance.yahoo.com/v10/finance/quoteSummary"
from collections.abc import MutableMapping
@@ -46,16 +49,16 @@ class InfoDictWrapper(MutableMapping):
def __getitem__(self, k):
if k in info_retired_keys_price:
print(f"Price data removed from info. Use Ticker.basic_info or history() instead")
warnings.warn(f"Price data removed from info (key='{k}'). Use Ticker.fast_info or history() instead", DeprecationWarning)
return None
elif k in info_retired_keys_exchange:
print(f"Exchange data removed from info. Use Ticker.basic_info or Ticker.get_history_metadata() instead")
warnings.warn(f"Exchange data removed from info (key='{k}'). Use Ticker.fast_info or Ticker.get_history_metadata() instead", DeprecationWarning)
return None
elif k in info_retired_keys_marketCap:
print(f"Market cap removed from info. Use Ticker.basic_info instead")
warnings.warn(f"Market cap removed from info (key='{k}'). Use Ticker.fast_info instead", DeprecationWarning)
return None
elif k in info_retired_keys_symbol:
print(f"Symbol removed from info. You know this already")
warnings.warn(f"Symbol removed from info (key='{k}'). You know this already", DeprecationWarning)
return None
return self.info[self._keytransform(k)]
@@ -75,6 +78,474 @@ class InfoDictWrapper(MutableMapping):
return k
class FastInfo:
# Contain small subset of info[] items that can be fetched faster elsewhere.
# Imitates a dict.
def __init__(self, tickerBaseObject):
utils.print_once("yfinance: Note: 'Ticker.info' dict is now fixed & improved, 'fast_info' is no longer faster")
self._tkr = tickerBaseObject
self._prices_1y = None
self._prices_1wk_1h_prepost = None
self._prices_1wk_1h_reg = None
self._md = None
self._currency = None
self._quote_type = None
self._exchange = None
self._timezone = None
self._shares = None
self._mcap = None
self._open = None
self._day_high = None
self._day_low = None
self._last_price = None
self._last_volume = None
self._prev_close = None
self._reg_prev_close = None
self._50d_day_average = None
self._200d_day_average = None
self._year_high = None
self._year_low = None
self._year_change = None
self._10d_avg_vol = None
self._3mo_avg_vol = None
# attrs = utils.attributes(self)
# self.keys = attrs.keys()
# utils.attributes is calling each method, bad! Have to hardcode
_properties = ["currency", "quote_type", "exchange", "timezone"]
_properties += ["shares", "market_cap"]
_properties += ["last_price", "previous_close", "open", "day_high", "day_low"]
_properties += ["regular_market_previous_close"]
_properties += ["last_volume"]
_properties += ["fifty_day_average", "two_hundred_day_average", "ten_day_average_volume", "three_month_average_volume"]
_properties += ["year_high", "year_low", "year_change"]
# Because released before fixing key case, need to officially support
# camel-case but also secretly support snake-case
base_keys = [k for k in _properties if not '_' in k]
sc_keys = [k for k in _properties if '_' in k]
self._sc_to_cc_key = {k:utils.snake_case_2_camelCase(k) for k in sc_keys}
self._cc_to_sc_key = {v:k for k,v in self._sc_to_cc_key.items()}
self._public_keys = sorted(base_keys + list(self._sc_to_cc_key.values()))
self._keys = sorted(self._public_keys + sc_keys)
# dict imitation:
def keys(self):
return self._public_keys
def items(self):
return [(k,self[k]) for k in self._public_keys]
def values(self):
return [self[k] for k in self._public_keys]
def get(self, key, default=None):
if key in self.keys():
if key in self._cc_to_sc_key:
key = self._cc_to_sc_key[key]
return self[key]
return default
def __getitem__(self, k):
if not isinstance(k, str):
raise KeyError(f"key must be a string")
if not k in self._keys:
raise KeyError(f"'{k}' not valid key. Examine 'FastInfo.keys()'")
if k in self._cc_to_sc_key:
k = self._cc_to_sc_key[k]
return getattr(self, k)
def __contains__(self, k):
return k in self.keys()
def __iter__(self):
return iter(self.keys())
def __str__(self):
return "lazy-loading dict with keys = " + str(self.keys())
def __repr__(self):
return self.__str__()
def toJSON(self, indent=4):
d = {k:self[k] for k in self.keys()}
return _json.dumps({k:self[k] for k in self.keys()}, indent=indent)
def _get_1y_prices(self, fullDaysOnly=False):
if self._prices_1y is None:
# Temporarily disable error printing
l = logger.level
logger.setLevel(logging.CRITICAL)
self._prices_1y = self._tkr.history(period="380d", auto_adjust=False, keepna=True)
logger.setLevel(l)
self._md = self._tkr.get_history_metadata()
try:
ctp = self._md["currentTradingPeriod"]
self._today_open = pd.to_datetime(ctp["regular"]["start"], unit='s', utc=True).tz_convert(self.timezone)
self._today_close = pd.to_datetime(ctp["regular"]["end"], unit='s', utc=True).tz_convert(self.timezone)
self._today_midnight = self._today_close.ceil("D")
except:
self._today_open = None
self._today_close = None
self._today_midnight = None
raise
if self._prices_1y.empty:
return self._prices_1y
dnow = pd.Timestamp.utcnow().tz_convert(self.timezone).date()
d1 = dnow
d0 = (d1 + datetime.timedelta(days=1)) - utils._interval_to_timedelta("1y")
if fullDaysOnly and self._exchange_open_now():
# Exclude today
d1 -= utils._interval_to_timedelta("1d")
return self._prices_1y.loc[str(d0):str(d1)]
def _get_1wk_1h_prepost_prices(self):
if self._prices_1wk_1h_prepost is None:
# Temporarily disable error printing
l = logger.level
logger.setLevel(logging.CRITICAL)
self._prices_1wk_1h_prepost = self._tkr.history(period="1wk", interval="1h", auto_adjust=False, prepost=True)
logger.setLevel(l)
return self._prices_1wk_1h_prepost
def _get_1wk_1h_reg_prices(self):
if self._prices_1wk_1h_reg is None:
# Temporarily disable error printing
l = logger.level
logger.setLevel(logging.CRITICAL)
self._prices_1wk_1h_reg = self._tkr.history(period="1wk", interval="1h", auto_adjust=False, prepost=False)
logger.setLevel(l)
return self._prices_1wk_1h_reg
def _get_exchange_metadata(self):
if self._md is not None:
return self._md
self._get_1y_prices()
self._md = self._tkr.get_history_metadata()
return self._md
def _exchange_open_now(self):
t = pd.Timestamp.utcnow()
self._get_exchange_metadata()
# if self._today_open is None and self._today_close is None:
# r = False
# else:
# r = self._today_open <= t and t < self._today_close
# if self._today_midnight is None:
# r = False
# elif self._today_midnight.date() > t.tz_convert(self.timezone).date():
# r = False
# else:
# r = t < self._today_midnight
last_day_cutoff = self._get_1y_prices().index[-1] + datetime.timedelta(days=1)
last_day_cutoff += datetime.timedelta(minutes=20)
r = t < last_day_cutoff
# print("_exchange_open_now() returning", r)
return r
@property
def currency(self):
if self._currency is not None:
return self._currency
if self._tkr._history_metadata is None:
self._get_1y_prices()
md = self._tkr.get_history_metadata()
self._currency = md["currency"]
return self._currency
@property
def quote_type(self):
if self._quote_type is not None:
return self._quote_type
if self._tkr._history_metadata is None:
self._get_1y_prices()
md = self._tkr.get_history_metadata()
self._quote_type = md["instrumentType"]
return self._quote_type
@property
def exchange(self):
if self._exchange is not None:
return self._exchange
self._exchange = self._get_exchange_metadata()["exchangeName"]
return self._exchange
@property
def timezone(self):
if self._timezone is not None:
return self._timezone
self._timezone = self._get_exchange_metadata()["exchangeTimezoneName"]
return self._timezone
@property
def shares(self):
if self._shares is not None:
return self._shares
shares = self._tkr.get_shares_full(start=pd.Timestamp.utcnow().date()-pd.Timedelta(days=548))
if shares is None:
# Requesting 18 months failed, so fallback to shares which should include last year
shares = self._tkr.get_shares()
if shares is not None:
if isinstance(shares, pd.DataFrame):
shares = shares[shares.columns[0]]
self._shares = int(shares.iloc[-1])
return self._shares
@property
def last_price(self):
if self._last_price is not None:
return self._last_price
prices = self._get_1y_prices()
if prices.empty:
md = self._get_exchange_metadata()
if "regularMarketPrice" in md:
self._last_price = md["regularMarketPrice"]
else:
self._last_price = float(prices["Close"].iloc[-1])
if _np.isnan(self._last_price):
md = self._get_exchange_metadata()
if "regularMarketPrice" in md:
self._last_price = md["regularMarketPrice"]
return self._last_price
@property
def previous_close(self):
if self._prev_close is not None:
return self._prev_close
prices = self._get_1wk_1h_prepost_prices()
fail = False
if prices.empty:
fail = True
else:
prices = prices[["Close"]].groupby(prices.index.date).last()
if prices.shape[0] < 2:
# Very few symbols have previousClose despite no
# no trading data e.g. 'QCSTIX'.
fail = True
else:
self._prev_close = float(prices["Close"].iloc[-2])
if fail:
# Fallback to original info[] if available.
self._tkr.info # trigger fetch
k = "previousClose"
if self._tkr._quote._retired_info is not None and k in self._tkr._quote._retired_info:
self._prev_close = self._tkr._quote._retired_info[k]
return self._prev_close
@property
def regular_market_previous_close(self):
if self._reg_prev_close is not None:
return self._reg_prev_close
prices = self._get_1y_prices()
if prices.shape[0] == 1:
# Tiny % of tickers don't return daily history before last trading day,
# so backup option is hourly history:
prices = self._get_1wk_1h_reg_prices()
prices = prices[["Close"]].groupby(prices.index.date).last()
if prices.shape[0] < 2:
# Very few symbols have regularMarketPreviousClose despite no
# no trading data. E.g. 'QCSTIX'.
# So fallback to original info[] if available.
self._tkr.info # trigger fetch
k = "regularMarketPreviousClose"
if self._tkr._quote._retired_info is not None and k in self._tkr._quote._retired_info:
self._reg_prev_close = self._tkr._quote._retired_info[k]
else:
self._reg_prev_close = float(prices["Close"].iloc[-2])
return self._reg_prev_close
@property
def open(self):
if self._open is not None:
return self._open
prices = self._get_1y_prices()
if prices.empty:
self._open = None
else:
self._open = float(prices["Open"].iloc[-1])
if _np.isnan(self._open):
self._open = None
return self._open
@property
def day_high(self):
if self._day_high is not None:
return self._day_high
prices = self._get_1y_prices()
if prices.empty:
self._day_high = None
else:
self._day_high = float(prices["High"].iloc[-1])
if _np.isnan(self._day_high):
self._day_high = None
return self._day_high
@property
def day_low(self):
if self._day_low is not None:
return self._day_low
prices = self._get_1y_prices()
if prices.empty:
self._day_low = None
else:
self._day_low = float(prices["Low"].iloc[-1])
if _np.isnan(self._day_low):
self._day_low = None
return self._day_low
@property
def last_volume(self):
if self._last_volume is not None:
return self._last_volume
prices = self._get_1y_prices()
self._last_volume = None if prices.empty else int(prices["Volume"].iloc[-1])
return self._last_volume
@property
def fifty_day_average(self):
if self._50d_day_average is not None:
return self._50d_day_average
prices = self._get_1y_prices(fullDaysOnly=True)
if prices.empty:
self._50d_day_average = None
else:
n = prices.shape[0]
a = n-50
b = n
if a < 0:
a = 0
self._50d_day_average = float(prices["Close"].iloc[a:b].mean())
return self._50d_day_average
@property
def two_hundred_day_average(self):
if self._200d_day_average is not None:
return self._200d_day_average
prices = self._get_1y_prices(fullDaysOnly=True)
if prices.empty:
self._200d_day_average = None
else:
n = prices.shape[0]
a = n-200
b = n
if a < 0:
a = 0
self._200d_day_average = float(prices["Close"].iloc[a:b].mean())
return self._200d_day_average
@property
def ten_day_average_volume(self):
if self._10d_avg_vol is not None:
return self._10d_avg_vol
prices = self._get_1y_prices(fullDaysOnly=True)
if prices.empty:
self._10d_avg_vol = None
else:
n = prices.shape[0]
a = n-10
b = n
if a < 0:
a = 0
self._10d_avg_vol = int(prices["Volume"].iloc[a:b].mean())
return self._10d_avg_vol
@property
def three_month_average_volume(self):
if self._3mo_avg_vol is not None:
return self._3mo_avg_vol
prices = self._get_1y_prices(fullDaysOnly=True)
if prices.empty:
self._3mo_avg_vol = None
else:
dt1 = prices.index[-1]
dt0 = dt1 - utils._interval_to_timedelta("3mo") + utils._interval_to_timedelta("1d")
self._3mo_avg_vol = int(prices.loc[dt0:dt1, "Volume"].mean())
return self._3mo_avg_vol
@property
def year_high(self):
if self._year_high is not None:
return self._year_high
prices = self._get_1y_prices(fullDaysOnly=True)
if prices.empty:
prices = self._get_1y_prices(fullDaysOnly=False)
self._year_high = float(prices["High"].max())
return self._year_high
@property
def year_low(self):
if self._year_low is not None:
return self._year_low
prices = self._get_1y_prices(fullDaysOnly=True)
if prices.empty:
prices = self._get_1y_prices(fullDaysOnly=False)
self._year_low = float(prices["Low"].min())
return self._year_low
@property
def year_change(self):
if self._year_change is not None:
return self._year_change
prices = self._get_1y_prices(fullDaysOnly=True)
if prices.shape[0] >= 2:
self._year_change = (prices["Close"].iloc[-1] - prices["Close"].iloc[0]) / prices["Close"].iloc[0]
self._year_change = float(self._year_change)
return self._year_change
@property
def market_cap(self):
if self._mcap is not None:
return self._mcap
try:
shares = self.shares
except Exception as e:
if "Cannot retrieve share count" in str(e):
shares = None
else:
raise
if shares is None:
# Very few symbols have marketCap despite no share count.
# E.g. 'BTC-USD'
# So fallback to original info[] if available.
self._tkr.info
k = "marketCap"
if self._tkr._quote._retired_info is not None and k in self._tkr._quote._retired_info:
self._mcap = self._tkr._quote._retired_info[k]
else:
self._mcap = float(shares * self.last_price)
return self._mcap
class Quote:
@@ -83,18 +554,22 @@ class Quote:
self.proxy = proxy
self._info = None
self._retired_info = None
self._sustainability = None
self._recommendations = None
self._calendar = None
self._already_scraped = False
self._already_scraped_complementary = False
self._already_fetched = False
self._already_fetched_complementary = False
@property
def info(self) -> dict:
if self._info is None:
self._scrape(self.proxy)
self._scrape_complementary(self.proxy)
# self._scrape(self.proxy) # decrypt broken
self._fetch(self.proxy)
self._fetch_complementary(self.proxy)
return self._info
@@ -127,7 +602,7 @@ class Quote:
quote_summary_store = json_data['QuoteSummaryStore']
except KeyError:
err_msg = "No summary info found, symbol may be delisted"
print('- %s: %s' % (self._data.ticker, err_msg))
logger.error('%s: %s', self._data.ticker, err_msg)
return None
# sustainability
@@ -201,10 +676,14 @@ class Quote:
# Delete redundant info[] keys, because values can be accessed faster
# elsewhere - e.g. price keys. Hope is reduces Yahoo spam effect.
if PRUNE_INFO:
for k in info_retired_keys:
if k in self._info:
# But record the dropped keys, because in rare cases they are needed.
self._retired_info = {}
for k in info_retired_keys:
if k in self._info:
self._retired_info[k] = self._info[k]
if PRUNE_INFO:
del self._info[k]
if PRUNE_INFO:
# InfoDictWrapper will explain how to access above data elsewhere
self._info = InfoDictWrapper(self._info)
@@ -233,12 +712,56 @@ class Quote:
except Exception:
pass
def _scrape_complementary(self, proxy):
if self._already_scraped_complementary:
def _fetch(self, proxy):
if self._already_fetched:
return
self._already_scraped_complementary = True
self._already_fetched = True
modules = ['summaryProfile', 'financialData', 'quoteType',
'defaultKeyStatistics', 'assetProfile', 'summaryDetail']
result = self._data.get_raw_json(
_BASIC_URL_ + f"/{self._data.ticker}", params={"modules": ",".join(modules), "ssl": "true"}, proxy=proxy
)
result["quoteSummary"]["result"][0]["symbol"] = self._data.ticker
query1_info = next(
(info for info in result.get("quoteSummary", {}).get("result", []) if info["symbol"] == self._data.ticker),
None,
)
# Most keys that appear in multiple dicts have same value. Except 'maxAge' because
# Yahoo not consistent with days vs seconds. Fix it here:
for k in query1_info:
if "maxAge" in query1_info[k] and query1_info[k]["maxAge"] == 1:
query1_info[k]["maxAge"] = 86400
query1_info = {
k1: v1
for k, v in query1_info.items()
if isinstance(v, dict)
for k1, v1 in v.items()
if v1
}
# recursively format but only because of 'companyOfficers'
def _format(k, v):
if isinstance(v, dict) and "raw" in v and "fmt" in v:
v2 = v["fmt"] if k in {"regularMarketTime", "postMarketTime"} else v["raw"]
elif isinstance(v, list):
v2 = [_format(None, x) for x in v]
elif isinstance(v, dict):
v2 = {k:_format(k, x) for k, x in v.items()}
elif isinstance(v, str):
v2 = v.replace("\xa0", " ")
else:
v2 = v
return v2
for k, v in query1_info.items():
query1_info[k] = _format(k, v)
self._info = query1_info
self._scrape(proxy)
def _fetch_complementary(self, proxy):
if self._already_fetched_complementary:
return
self._already_fetched_complementary = True
# self._scrape(proxy) # decrypt broken
self._fetch(proxy)
if self._info is None:
return
@@ -280,11 +803,14 @@ class Quote:
json_str = self._data.cache_get(url=url, proxy=proxy).text
json_data = json.loads(json_str)
key_stats = json_data["timeseries"]["result"][0]
if k not in key_stats:
# Yahoo website prints N/A, indicates Yahoo lacks necessary data to calculate
try:
key_stats = json_data["timeseries"]["result"][0]
if k not in key_stats:
# Yahoo website prints N/A, indicates Yahoo lacks necessary data to calculate
v = None
else:
# Select most recent (last) raw value in list:
v = key_stats[k][-1]["reportedValue"]["raw"]
except Exception:
v = None
else:
# Select most recent (last) raw value in list:
v = key_stats[k][-1]["reportedValue"]["raw"]
self._info[k] = v

View File

@@ -0,0 +1,8 @@
daf93e37cbf219cd4c1f3f74ec4551265ec5565b99e8c9322dccd6872941cf13c818cbb88cba6f530e643b4e2329b17ec7161f4502ce6a02bb0dbbe5fc0d0474
ad4d90b3c9f2e1d156ef98eadfa0ff93e4042f6960e54aa2a13f06f528e6b50ba4265a26a1fd5b9cd3db0d268a9c34e1d080592424309429a58bce4adc893c87
e9a8ab8e5620b712ebc2fb4f33d5c8b9c80c0d07e8c371911c785cf674789f1747d76a909510158a7b7419e86857f2d7abbd777813ff64840e4cbc514d12bcae
6ae2523aeafa283dad746556540145bf603f44edbf37ad404d3766a8420bb5eb1d3738f52a227b88283cca9cae44060d5f0bba84b6a495082589f5fe7acbdc9e
3365117c2a368ffa5df7313a4a84988f73926a86358e8eea9497c5ff799ce27d104b68e5f2fbffa6f8f92c1fef41765a7066fa6bcf050810a9c4c7872fd3ebf0
15d8f57919857d5a5358d2082c7ef0f1129cfacd2a6480333dcfb954b7bb67d820abefebfdb0eaa6ef18a1c57f617b67d7e7b0ec040403b889630ae5db5a4dbb
db9630d707a7d0953ac795cd8db1ca9ca6c9d8239197cdfda24b4e0ec9c37eaec4db82dab68b8f606ab7b5b4af3e65dab50606f8cf508269ec927e6ee605fb78
3c895fb5ddcc37d20d3073ed74ee3efad59bcb147c8e80fd279f83701b74b092d503dcd399604c6d8be8f3013429d3c2c76ed5b31b80c9df92d5eab6d3339fce

View File

@@ -22,4 +22,5 @@
_DFS = {}
_PROGRESS_BAR = None
_ERRORS = {}
_TRACEBACKS = {}
_ISINS = {}

View File

@@ -133,10 +133,6 @@ class Ticker(TickerBase):
def shares(self) -> _pd.DataFrame :
return self.get_shares()
@property
def market_cap(self) -> float:
return self.calc_market_cap()
@property
def info(self) -> dict:
return self.get_info()

View File

@@ -87,10 +87,4 @@ class Tickers:
return data
def news(self):
collection = {}
for ticker in self.symbols:
collection[ticker] = []
items = Ticker(ticker).news
for item in items:
collection[ticker].append(item)
return collection
return {ticker: [item for item in Ticker(ticker).news] for ticker in self.symbols}

View File

@@ -35,6 +35,8 @@ import os as _os
import appdirs as _ad
import sqlite3 as _sqlite3
import atexit as _atexit
from functools import lru_cache
import logging
from threading import Lock
@@ -61,6 +63,27 @@ def attributes(obj):
if name[0] != '_' and name not in disallowed_names and hasattr(obj, name)}
@lru_cache(maxsize=20)
def print_once(msg):
# 'warnings' module suppression of repeat messages does not work.
# This function replicates correct behaviour
print(msg)
yf_logger = None
def get_yf_logger():
global yf_logger
if yf_logger is None:
yf_logger = logging.getLogger("yfinance")
if yf_logger.handlers is None or len(yf_logger.handlers) == 0:
# Add stream handler if user not already added one
h = logging.StreamHandler()
formatter = logging.Formatter(fmt='%(levelname)s %(message)s')
h.setFormatter(formatter)
yf_logger.addHandler(h)
return yf_logger
def is_isin(string):
return bool(_re.match("^([A-Z]{2})([A-Z0-9]{9})([0-9]{1})$", string))
@@ -300,6 +323,11 @@ def camel2title(strings: List[str], sep: str = ' ', acronyms: Optional[List[str]
return strings
def snake_case_2_camelCase(s):
sc = s.split('_')[0] + ''.join(x.title() for x in s.split('_')[1:])
return sc
def _parse_user_dt(dt, exchange_tz):
if isinstance(dt, int):
# Should already be epoch, test with conversion:
@@ -333,10 +361,10 @@ def _interval_to_timedelta(interval):
def auto_adjust(data):
col_order = data.columns
df = data.copy()
ratio = df["Close"] / df["Adj Close"]
df["Adj Open"] = df["Open"] / ratio
df["Adj High"] = df["High"] / ratio
df["Adj Low"] = df["Low"] / ratio
ratio = (df["Adj Close"] / df["Close"]).to_numpy()
df["Adj Open"] = df["Open"] * ratio
df["Adj High"] = df["High"] * ratio
df["Adj Low"] = df["Low"] * ratio
df.drop(
["Open", "High", "Low", "Close"],
@@ -399,12 +427,9 @@ def parse_quotes(data):
def parse_actions(data):
dividends = _pd.DataFrame(
columns=["Dividends"], index=_pd.DatetimeIndex([]))
capital_gains = _pd.DataFrame(
columns=["Capital Gains"], index=_pd.DatetimeIndex([]))
splits = _pd.DataFrame(
columns=["Stock Splits"], index=_pd.DatetimeIndex([]))
dividends = None
capital_gains = None
splits = None
if "events" in data:
if "dividends" in data["events"]:
@@ -433,6 +458,16 @@ def parse_actions(data):
splits["denominator"]
splits = splits[["Stock Splits"]]
if dividends is None:
dividends = _pd.DataFrame(
columns=["Dividends"], index=_pd.DatetimeIndex([]))
if capital_gains is None:
capital_gains = _pd.DataFrame(
columns=["Capital Gains"], index=_pd.DatetimeIndex([]))
if splits is None:
splits = _pd.DataFrame(
columns=["Stock Splits"], index=_pd.DatetimeIndex([]))
return dividends, splits, capital_gains
@@ -443,6 +478,34 @@ def set_df_tz(df, interval, tz):
return df
def fix_Yahoo_returning_prepost_unrequested(quotes, interval, tradingPeriods):
# Sometimes Yahoo returns post-market data despite not requesting it.
# Normally happens on half-day early closes.
#
# And sometimes returns pre-market data despite not requesting it.
# E.g. some London tickers.
tps_df = tradingPeriods.copy()
tps_df["_date"] = tps_df.index.date
quotes["_date"] = quotes.index.date
idx = quotes.index.copy()
quotes = quotes.merge(tps_df, how="left")
quotes.index = idx
# "end" = end of regular trading hours (including any auction)
f_drop = quotes.index >= quotes["end"]
f_drop = f_drop | (quotes.index < quotes["start"])
if f_drop.any():
# When printing report, ignore rows that were already NaNs:
# f_na = quotes[["Open","Close"]].isna().all(axis=1)
# n_nna = quotes.shape[0] - _np.sum(f_na)
# n_drop_nna = _np.sum(f_drop & ~f_na)
# quotes_dropped = quotes[f_drop]
# if debug and n_drop_nna > 0:
# print(f"Dropping {n_drop_nna}/{n_nna} intervals for falling outside regular trading hours")
quotes = quotes[~f_drop]
quotes = quotes.drop(["_date", "start", "end"], axis=1)
return quotes
def fix_Yahoo_returning_live_separate(quotes, interval, tz_exchange):
# Yahoo bug fix. If market is open today then Yahoo normally returns
# todays data as a separate row from rest-of week/month interval in above row.
@@ -477,16 +540,24 @@ def fix_Yahoo_returning_live_separate(quotes, interval, tz_exchange):
# Last two rows are within same interval
idx1 = quotes.index[n - 1]
idx2 = quotes.index[n - 2]
if idx1 == idx2:
# Yahoo returning last interval duplicated, which means
# Yahoo is not returning live data (phew!)
return quotes
if _np.isnan(quotes.loc[idx2, "Open"]):
quotes.loc[idx2, "Open"] = quotes["Open"][n - 1]
# Note: nanmax() & nanmin() ignores NaNs
quotes.loc[idx2, "High"] = _np.nanmax([quotes["High"][n - 1], quotes["High"][n - 2]])
quotes.loc[idx2, "Low"] = _np.nanmin([quotes["Low"][n - 1], quotes["Low"][n - 2]])
# Note: nanmax() & nanmin() ignores NaNs, but still need to check not all are NaN to avoid warnings
if not _np.isnan(quotes["High"][n - 1]):
quotes.loc[idx2, "High"] = _np.nanmax([quotes["High"][n - 1], quotes["High"][n - 2]])
if "Adj High" in quotes.columns:
quotes.loc[idx2, "Adj High"] = _np.nanmax([quotes["Adj High"][n - 1], quotes["Adj High"][n - 2]])
if not _np.isnan(quotes["Low"][n - 1]):
quotes.loc[idx2, "Low"] = _np.nanmin([quotes["Low"][n - 1], quotes["Low"][n - 2]])
if "Adj Low" in quotes.columns:
quotes.loc[idx2, "Adj Low"] = _np.nanmin([quotes["Adj Low"][n - 1], quotes["Adj Low"][n - 2]])
quotes.loc[idx2, "Close"] = quotes["Close"][n - 1]
if "Adj High" in quotes.columns:
quotes.loc[idx2, "Adj High"] = _np.nanmax([quotes["Adj High"][n - 1], quotes["Adj High"][n - 2]])
if "Adj Low" in quotes.columns:
quotes.loc[idx2, "Adj Low"] = _np.nanmin([quotes["Adj Low"][n - 1], quotes["Adj Low"][n - 2]])
if "Adj Close" in quotes.columns:
quotes.loc[idx2, "Adj Close"] = quotes["Adj Close"][n - 1]
quotes.loc[idx2, "Volume"] += quotes["Volume"][n - 1]
@@ -518,7 +589,7 @@ def safe_merge_dfs(df_main, df_sub, interval):
df["_NewIndex"] = new_index
# Duplicates present within periods but can aggregate
if data_col_name == "Dividends":
if data_col_name in ["Dividends", "Capital Gains"]:
# Add
df = df.groupby("_NewIndex").sum()
df.index.name = None
@@ -656,6 +727,65 @@ def is_valid_timezone(tz: str) -> bool:
return True
def format_history_metadata(md, tradingPeriodsOnly=True):
if not isinstance(md, dict):
return md
if len(md) == 0:
return md
tz = md["exchangeTimezoneName"]
if not tradingPeriodsOnly:
for k in ["firstTradeDate", "regularMarketTime"]:
if k in md and md[k] is not None:
if isinstance(md[k], int):
md[k] = _pd.to_datetime(md[k], unit='s', utc=True).tz_convert(tz)
if "currentTradingPeriod" in md:
for m in ["regular", "pre", "post"]:
if m in md["currentTradingPeriod"] and isinstance(md["currentTradingPeriod"][m]["start"], int):
for t in ["start", "end"]:
md["currentTradingPeriod"][m][t] = \
_pd.to_datetime(md["currentTradingPeriod"][m][t], unit='s', utc=True).tz_convert(tz)
del md["currentTradingPeriod"][m]["gmtoffset"]
del md["currentTradingPeriod"][m]["timezone"]
if "tradingPeriods" in md:
tps = md["tradingPeriods"]
if tps == {"pre":[], "post":[]}:
# Ignore
pass
elif isinstance(tps, (list, dict)):
if isinstance(tps, list):
# Only regular times
df = _pd.DataFrame.from_records(_np.hstack(tps))
df = df.drop(["timezone", "gmtoffset"], axis=1)
df["start"] = _pd.to_datetime(df["start"], unit='s', utc=True).dt.tz_convert(tz)
df["end"] = _pd.to_datetime(df["end"], unit='s', utc=True).dt.tz_convert(tz)
elif isinstance(tps, dict):
# Includes pre- and post-market
pre_df = _pd.DataFrame.from_records(_np.hstack(tps["pre"]))
post_df = _pd.DataFrame.from_records(_np.hstack(tps["post"]))
regular_df = _pd.DataFrame.from_records(_np.hstack(tps["regular"]))
pre_df = pre_df.rename(columns={"start":"pre_start", "end":"pre_end"}).drop(["timezone", "gmtoffset"], axis=1)
post_df = post_df.rename(columns={"start":"post_start", "end":"post_end"}).drop(["timezone", "gmtoffset"], axis=1)
regular_df = regular_df.drop(["timezone", "gmtoffset"], axis=1)
cols = ["pre_start", "pre_end", "start", "end", "post_start", "post_end"]
df = regular_df.join(pre_df).join(post_df)
for c in cols:
df[c] = _pd.to_datetime(df[c], unit='s', utc=True).dt.tz_convert(tz)
df = df[cols]
df.index = _pd.to_datetime(df["start"].dt.date)
df.index = df.index.tz_localize(tz)
df.index.name = "Date"
md["tradingPeriods"] = df
return md
class ProgressBar:
def __init__(self, iterations, text='completed'):
self.text = text
@@ -718,7 +848,14 @@ class _KVStore:
with self._cache_mutex:
self.conn = _sqlite3.connect(filename, timeout=10, check_same_thread=False)
self.conn.execute('pragma journal_mode=wal')
self.conn.execute('create table if not exists "kv" (key TEXT primary key, value TEXT) without rowid')
try:
self.conn.execute('create table if not exists "kv" (key TEXT primary key, value TEXT) without rowid')
except Exception as e:
if 'near "without": syntax error' in str(e):
# "without rowid" requires sqlite 3.8.2. Older versions will raise exception
self.conn.execute('create table if not exists "kv" (key TEXT primary key, value TEXT)')
else:
raise
self.conn.commit()
_atexit.register(self.close)
@@ -806,6 +943,8 @@ class _TzCache:
df = _pd.read_csv(old_cache_file_path, index_col="Ticker")
except _pd.errors.EmptyDataError:
_os.remove(old_cache_file_path)
except TypeError:
_os.remove(old_cache_file_path)
else:
self.tz_db.bulk_set(df.to_dict()['Tz'])
_os.remove(old_cache_file_path)
@@ -838,9 +977,10 @@ def get_tz_cache():
try:
_tz_cache = _TzCache()
except _TzCacheException as err:
print("Failed to create TzCache, reason: {}".format(err))
print("TzCache will not be used.")
print("Tip: You can direct cache to use a different location with 'set_tz_cache_location(mylocation)'")
logger.error("Failed to create TzCache, reason: %s. "
"TzCache will not be used. "
"Tip: You can direct cache to use a different location with 'set_tz_cache_location(mylocation)'",
err)
_tz_cache = _TzCacheDummy()
return _tz_cache

View File

@@ -1 +1 @@
version = "0.2.6"
version = "0.2.19b1"