Compare commits

...

351 Commits

Author SHA1 Message Date
Value Raider
ca8c1c8cb4 Bump version to 0.2.12 2023-02-16 12:01:25 +00:00
ValueRaider
6b8b0d5c86 Merge pull request #1422 from ranaroussi/hotfix/disable-decrypt-fail-msg
Disable annoying 'backup decrypt' msg
2023-02-16 12:00:16 +00:00
Value Raider
952a04338f Disable annoying 'backup decrypt' msg 2023-02-15 16:46:55 +00:00
ValueRaider
62a442bd15 Update yahoo-keys.txt 2023-02-14 00:06:06 +00:00
ValueRaider
e96f4f3cc0 Update yahoo-keys.txt 2023-02-12 09:57:25 +00:00
ValueRaider
cd5d0dfc3b Bump version to 0.2.11 2023-02-10 16:59:20 +00:00
ValueRaider
ece41cdb06 Merge pull request #1411 from sdeibel/main
Fix format_history_metadata for some symbols
2023-02-10 16:30:03 +00:00
ValueRaider
c362d54b1a Fix other metadata accesses + tests 2023-02-09 19:41:50 +00:00
Stephan Deibel
543e4fe582 Fix format_history_metadata for some symbols
Fix format_history_metadata when firstTradeDate is None, as is the case for QCSTIX and probably others.
2023-02-09 13:46:52 -05:00
ValueRaider
53fca7016e Bump version to 0.2.10 2023-02-07 22:05:17 +00:00
ValueRaider
4b6529c3a5 Merge pull request #1406 from ranaroussi/dev
dev -> main
2023-02-07 22:03:20 +00:00
ValueRaider
8957147926 Merge branch 'main' into dev 2023-02-07 22:02:46 +00:00
ValueRaider
4c7392ed17 Merge pull request #1403 from ranaroussi/fix/decrypt-keys
Fix decrypt keys
2023-02-07 21:55:33 +00:00
ValueRaider
508de4aefb Dev version 0.2.10b3 2023-02-07 14:09:08 +00:00
ValueRaider
3d39992280 Add resilience to price repair
When calibrating price repair, use weighted average to estimate stock split ratio, is more resilient
2023-02-07 14:07:08 +00:00
ValueRaider
b462836540 Merge pull request #1385 from ranaroussi/fix/download-tz-behaviour
Restore original download() timezone handling
2023-02-07 13:16:03 +00:00
ValueRaider
2795660c28 Add a 5th backup key 2023-02-07 13:10:03 +00:00
ValueRaider
3dc87753ea Fix _get_decryption_keys_from_yahoo_js() returning '' 2023-02-07 13:09:49 +00:00
ValueRaider
645cc19037 Merge pull request #1379 from ranaroussi/feature/improve-decrypt
Add another backup decrypt option
2023-02-06 22:24:22 +00:00
ValueRaider
86d6acccf7 Fix dumb bugs in price repair - 1 more 2023-02-05 18:17:47 +00:00
ValueRaider
4fa32a98ed Merge pull request #1397 from Matt-Seath/dev
Catch TypeError Exception
2023-02-05 13:49:48 +00:00
Matt Seath
35f4071c0b Catch TypeError Exception
Addresses recent issue where calling Ticker.info would occasionally result in a TypeError Exception at line 287.
2023-02-05 11:49:40 +10:00
ValueRaider
86b00091a9 Fix dumb bugs in price repair 2023-02-02 21:57:55 +00:00
ValueRaider
2a2928b4a0 Fix 'tradingPeriods' parsing when empty - 0.2.10b2 2023-02-01 13:31:54 +00:00
ValueRaider
d47133e5bf Dev version 0.2.10b1 2023-01-31 22:12:11 +00:00
ValueRaider
8f0c58dafa Dev version 0.2.10b0 2023-01-31 22:02:41 +00:00
ValueRaider
27a721c7dd Merge pull request #1380 from ranaroussi/fix/old-sqlite-error
Allow using sqlite3 < 3.8.2
2023-01-31 19:52:22 +00:00
ValueRaider
3e964d5319 Merge pull request #1383 from ranaroussi/fix/fast-info-prepost
Fix fast_info["previousClose"]
2023-01-31 19:51:46 +00:00
ValueRaider
84a31ae0b4 Merge pull request #1311 from ranaroussi/feature/prices-metadata-prune-prepost
Drop intraday intervals if in post-market but prepost=False
2023-01-31 19:50:00 +00:00
ValueRaider
891b533ec2 Drop intraday intervals if in prepost but prepost=False 2023-01-31 19:48:47 +00:00
ValueRaider
b9fb3e4979 Restore original download() tz handling: day/week/etc = ignore 2023-01-31 00:00:45 +00:00
ValueRaider
09342982a4 Add 'quoteType'. Improve handling tickers without trading 2023-01-30 23:53:06 +00:00
ValueRaider
da8c49011e fast_info: Fix previousClose & yearChange 2023-01-30 16:06:55 +00:00
ValueRaider
b805f0a010 Add another backup decrypt option 2023-01-29 23:09:45 +00:00
ValueRaider
5b0feb3d20 Fix tests 2023-01-29 16:53:26 +00:00
ValueRaider
ecbfc2957d bug_report: tighten language (again) 2023-01-29 13:58:02 +00:00
ValueRaider
e96248dec7 README: fix narrative ordering 2023-01-29 13:52:13 +00:00
ValueRaider
7d0045f03c README: simplify API overview with link to Wiki 2023-01-29 13:49:01 +00:00
ValueRaider
c3d7449844 Merge pull request #1289 from ranaroussi/fix/price-repair
Fix & improve price repair
2023-01-29 13:02:48 +00:00
ValueRaider
a4f11b0243 Fix price repair tests, remove unrelated changes 2023-01-29 13:01:54 +00:00
ValueRaider
1702fd0797 bug_report: tighten language 2023-01-29 00:54:27 +00:00
ValueRaider
464b3333d7 Allow using sqlite3 < 3.8.2 2023-01-29 00:34:46 +00:00
ValueRaider
685f2ec351 Merge branch 'dev' into fix/price-repair 2023-01-28 23:26:56 +00:00
ValueRaider
aad46baf28 price repair: Fix 'min_dt', add 'silent' mode 2023-01-28 23:14:28 +00:00
ValueRaider
a97db0aac6 README: add how-to for requests rate-limiting 2023-01-28 23:10:38 +00:00
ValueRaider
af5f96f97e Merge pull request #1368 from ranaroussi/fix/fast-info-camel-case
`fast_info` usability improvements
2023-01-28 22:28:42 +00:00
ValueRaider
a4bdaea888 fast_info: add camelCase, items() & values() 2023-01-28 22:27:51 +00:00
ValueRaider
ac5a9d2793 Merge pull request #1367 from ranaroussi/main
main -> dev
2023-01-27 22:09:59 +00:00
ValueRaider
b17ad32a47 Merge pull request #1366 from ranaroussi/doc/readme-explain-instability
README: comment on instability, tidy Ticker 'Quick start'
2023-01-27 18:31:32 +00:00
ValueRaider
af39855e28 README: comment on instability, tidy Ticker 'Quick start' 2023-01-27 17:36:25 +00:00
ValueRaider
ac6e047f0d Bump version to 0.2.9 2023-01-26 22:21:46 +00:00
ValueRaider
1e24337f29 Bump version to 0.2.8 2023-01-26 22:20:11 +00:00
ValueRaider
2cc82ae12f Merge pull request #1362 from ranaroussi/hotfix/fast-info-bugs
Ticker.fast_info: fix teething bugs
2023-01-26 22:03:06 +00:00
ValueRaider
d11f385049 Make fast_info JSON-serializable via toJSON() 2023-01-26 21:45:53 +00:00
ValueRaider
7377611e1f Add 'get(key, default)' to fast_info 2023-01-26 21:23:31 +00:00
ValueRaider
f3b5fb85c9 Remove exception raise from 'get_shares_full()' 2023-01-26 21:14:48 +00:00
ValueRaider
a4faef83ac 'fast_info' fixes: unusual symbols ; improve migration message ; 'regular_market_previous_close' 2023-01-26 21:02:18 +00:00
ValueRaider
e1184f745b Update yahoo-keys.txt 2023-01-26 17:06:03 +00:00
ValueRaider
fe630008e9 Bump version to 0.2.7 2023-01-26 17:03:00 +00:00
ValueRaider
b43072cf0a Merge pull request #1354 from ranaroussi/hotfix/rename-basic-info
Rename 'basic_info' -> 'fast_info'
2023-01-26 17:00:54 +00:00
ValueRaider
ad3f4cabc9 Improve 'get_shares_full()' error handling 2023-01-26 16:58:26 +00:00
ValueRaider
f70567872c Merge pull request #1353 from ranaroussi/hotfix/smart-decryption
Add decrypt key extraction from JS + GitHub backup
2023-01-26 16:44:23 +00:00
ValueRaider
a8ade72113 Rename 'basic_info' -> 'fast_info' ; Fix info tests 2023-01-26 16:36:25 +00:00
ValueRaider
1dcc8c9c8b Remove dead debug code 2023-01-26 14:57:15 +00:00
ValueRaider
dd5462b307 Add decrypt key extraction from JS + GitHub backup 2023-01-26 14:52:18 +00:00
ValueRaider
e39c03e8e3 Hardcode decrypt keys in GitHub for fix w/o PIP
`yfinance` will query this file via web request as a last resort. Avoids having to release a new PIP version just for a key update.
2023-01-26 14:20:03 +00:00
ValueRaider
9297504b84 Merge pull request #1346 from ranaroussi/main
main -> dev sync
2023-01-25 22:16:22 +00:00
ValueRaider
3971115ab9 Bump version to 0.2.6 2023-01-25 19:10:31 +00:00
ValueRaider
b5badbbc61 Merge pull request #1342 from ranaroussi/hotfix/basic_info
Fix 'Ticker.basic_info' lazy-loading
2023-01-25 19:09:37 +00:00
ValueRaider
ba8621f5be Fix Ticker.basic_info.keys() calling each method 2023-01-25 18:35:54 +00:00
ValueRaider
8e5c94a4eb Bump version to 0.2.5 2023-01-25 16:45:30 +00:00
ValueRaider
66a1c1a174 Merge pull request #1337 from ranaroussi/dev
dev -> main
2023-01-25 16:40:56 +00:00
ValueRaider
ab6214df79 Merge pull request #1336 from ranaroussi/hotfix/decryption
Hardcode decryption keys
2023-01-25 16:40:38 +00:00
ValueRaider
dc5d42c8e2 Add another key 2023-01-25 15:46:07 +00:00
ValueRaider
ab75495cd3 Hardcode decryption keys 2023-01-25 14:45:04 +00:00
ValueRaider
39c1ecc7a2 Improve price repair - reduce spam, improve data reliability
Extend 'reconstruct groups' to reduce Yahoo spam ; Extend fetch range to avoid first/last day irregularities ; Improve handling of 'max fetch days' Yahoo limit
2023-01-25 14:37:43 +00:00
ValueRaider
af7720668c Merge pull request #1328 from CollieIsCute/main
use dict comprehension to improve speed
2023-01-25 13:42:44 +00:00
Collie Tsai
9051fba601 use dict comprehension to improve speed 2023-01-25 21:15:54 +08:00
ValueRaider
03ea6acec0 Merge pull request #1317 from ranaroussi/feature/prune-info
`Ticker.basic_info` - fast but minimal alternative to `info[]`
2023-01-25 11:28:22 +00:00
ValueRaider
ddc93033d7 Reorder contents of bug_report.md 2023-01-23 11:53:00 +00:00
ValueRaider
eb6d830e2a Fix repair volume=0 ; Tidy code 2023-01-21 23:00:30 +00:00
ValueRaider
2b0ae5a6c1 Remove 'repair_intervals' 2023-01-21 16:58:45 +00:00
ValueRaider
1636839b67 Handle request to reconstruct 1m 2023-01-20 00:13:28 +00:00
ValueRaider
65b97d024b Improve reporting 2023-01-20 00:13:02 +00:00
ValueRaider
fb77d35863 Update README 2023-01-19 22:33:54 +00:00
ValueRaider
197d2968e3 Add 'repair_intervals', rename 'repair'->'repair_prices' 2023-01-19 22:19:16 +00:00
ValueRaider
7460dbea17 If reconstructing 1d interval with 1h, always request prepost 2023-01-19 22:18:46 +00:00
ValueRaider
b49fd797fc Fix & improve price repair
Fix repair calibration & volume=0 repair ; Extend repair to sub-hour ; Avoid attempting repair of mostly-NaN days
2023-01-19 22:18:46 +00:00
ValueRaider
6bd8fb2290 Improve test ; Add more keys to basic_info 2023-01-19 14:57:34 +00:00
ValueRaider
cd1e16ad9e Add test ; Fix 1y price stats 2023-01-19 00:37:17 +00:00
ValueRaider
3fd9ea2204 Remove more info[] keys - #2 2023-01-18 16:55:31 +00:00
ValueRaider
d5a1266cbe Remove more info[] keys 2023-01-17 20:13:32 +00:00
ValueRaider
89bbe8ad4c Override Ticker.basic_info __str__() 2023-01-17 19:49:42 +00:00
ValueRaider
e44c6f8b0e Add 'Ticker.basic_info' 2023-01-17 14:10:28 +00:00
ValueRaider
0ba810fda5 Improve 'history_metadata' formatting 2023-01-16 18:30:28 +00:00
ValueRaider
677bbfed8b Add Ticker.market_cap helper ; Tidy info[] blacklist 2023-01-16 11:23:35 +00:00
ValueRaider
97671b78dd Move info migrate msgs from 'is in' to '[]' 2023-01-14 23:11:02 +00:00
ValueRaider
2865c0df9f Prune info[] with migration instructions
Remove redundant keys from info[] that are better found elsewhere ; Print instructions if old keys accessed via InfoDictWrapper
2023-01-14 23:07:04 +00:00
ValueRaider
0c037ddd12 Bump version to 0.2.4 2023-01-14 22:58:53 +00:00
ValueRaider
3ee4674098 Merge pull request #1302 from ranaroussi/dev
dev -> main
2023-01-14 22:58:33 +00:00
ValueRaider
5d9a91da4a Improve 'get_shares_full()' error handling ; Minor fixes 2023-01-14 22:44:54 +00:00
ValueRaider
47c579ff22 Merge pull request #1297 from alexa-infra/fix-stores-decryption
Fix stores decrypt
2023-01-14 20:06:52 +00:00
ValueRaider
caf5cba801 Merge pull request #1301 from ranaroussi/feature/share-count
Feature/share count
2023-01-14 19:53:45 +00:00
ValueRaider
486c7894ce get_shares_full(): convert to pd.Series, add test 2023-01-14 17:32:54 +00:00
ValueRaider
db8a00edae get_shares_full(): remove caching, tidy API 2023-01-14 17:11:57 +00:00
ValueRaider
805523b924 Fix 'get_shares_full()' post-rebase 2023-01-14 16:58:58 +00:00
ValueRaider
32ab2e648d get_shares_full() set default range 1yr 2023-01-14 16:35:54 +00:00
ValueRaider
4d91ae740a Add date args to 'shares_full()' and caching 2023-01-14 16:35:54 +00:00
ValueRaider
05ec4b4312 Add full share count history via 'shares_full' 2023-01-14 16:35:51 +00:00
ValueRaider
cd2c1ada14 Improve decrypt key deduction 2023-01-14 15:41:33 +00:00
ValueRaider
4ca9642403 Ensure 'requests_cache' responses processed ; Improve naming 2023-01-14 14:20:40 +00:00
Alexey Vasilyev
b438f29a71 Fix decryption 2023-01-14 08:06:35 +01:00
ValueRaider
4db178b8d6 Merge pull request #1284 from ranaroussi/fix/financials-caching
Improve caching of financials data
2023-01-12 11:47:04 +00:00
ValueRaider
38637a9821 Merge pull request #1283 from DE0CH/ignore-tz-false
Change default value to ignore_tz to False
2023-01-08 12:45:00 +00:00
Deyao Chen
de8c0bdcdd Change default value to ignore_tz to False
Bring the behavior of download() to be the same as 0.1.77.
2023-01-08 11:47:13 +08:00
ValueRaider
fd35975cf9 Improve caching of financials data 2023-01-07 18:02:16 +00:00
ValueRaider
1495834a09 Merge pull request #1276 from gogog22510/main
Fix the database lock error in multithread download
2023-01-04 23:10:22 +00:00
ValueRaider
2a7588dead Tidy DB lock fix 2023-01-04 21:32:54 +00:00
gogog22510
051de748b9 Fix the database lock error in multithread download 2023-01-04 12:37:59 -05:00
ValueRaider
97adb30d41 Merge pull request #1262 from ranaroussi/main
Sync `main` -> `dev`
2022-12-20 20:42:10 +00:00
ValueRaider
eacfbc45c0 Bump version to 0.2.3 2022-12-20 11:57:04 +00:00
ValueRaider
8deddd7ee9 Make financials API '_' use consistent 2022-12-20 11:56:57 +00:00
ValueRaider
beb494b67e README: add small section on version 0.2 2022-12-20 11:37:16 +00:00
ValueRaider
e2948a8b48 Bump version to 0.2.2 2022-12-20 11:33:04 +00:00
ValueRaider
ff3d3f2f78 Restore 'financials' attribute (map to 'income_stmt') 2022-12-20 11:32:19 +00:00
ValueRaider
85783da515 README: update 'repair' doc 2022-12-19 23:30:29 +00:00
ValueRaider
9dbfad4294 Bump version to 0.2.1 2022-12-19 23:19:42 +00:00
ValueRaider
5e54b92efd Fix _reconstruct_intervals_batch() calibration bug 2022-12-19 18:09:06 +00:00
ValueRaider
cffdbd47b5 Merge pull request #1253 from Rogach/pr/decode-stores
decode encrypted root.App.main.context.dispatcher.stores
2022-12-19 12:29:57 +00:00
ValueRaider
f398f46509 Switch 'pycryptodome' -> 'cryptography' 2022-12-19 12:28:51 +00:00
ValueRaider
097c76aa46 Add 'pycryptodome' requirement 2022-12-18 13:26:12 +00:00
ValueRaider
a9da16e048 Fix get_json_data_stores() behaviour 2022-12-18 13:19:11 +00:00
Platon Pronko
8e5f0984af decode encrypted root.App.main.context.dispatcher.stores 2022-12-18 11:40:26 +04:00
ValueRaider
38b738e766 Bump version to 0.2.0rc5 2022-12-16 16:27:46 +00:00
ValueRaider
55772d30a4 Merge pull request #1245 from ranaroussi/dev
Merge dev -> main for release 0.2.0rc5
2022-12-16 16:25:36 +00:00
ValueRaider
382285cfd9 Remove hardcoded paths 2022-12-16 16:24:16 +00:00
ValueRaider
d2e5ce284e Merge pull request #1243 from ranaroussi/fix/financials-error-handling
Improve financials error handling
2022-12-16 16:20:25 +00:00
ValueRaider
88d21d742d Merge pull request #1244 from ranaroussi/fix/repair-100x
Fix '100x price' repair
2022-12-16 16:20:17 +00:00
ValueRaider
7a0356d47b Document financials get() methods 2022-12-16 16:19:37 +00:00
ValueRaider
a13bf0cd6c Hide divide-by-0 warnings 2022-12-16 15:05:38 +00:00
ValueRaider
7cacf233ce Improve financials error handling
Nicely intercept parse errors in get_json_data_stores() & _create_financials_table_old() ; Improve exception messages ; Fix typo 'YFiance'
2022-12-16 13:22:17 +00:00
ValueRaider
b48212e420 Repair-100x now tolerates zeroes 2022-12-14 21:16:16 +00:00
ValueRaider
f10f9970b2 Bump version to 0.2.0rc4 2022-12-13 22:12:23 +00:00
ValueRaider
96ff214107 Fix tests 2022-12-13 21:45:28 +00:00
ValueRaider
e7bf3607e8 Fix tests 2022-12-13 21:41:46 +00:00
ValueRaider
2883362a0e Merge pull request #1238 from ranaroussi/dev
Merge dev -> main for release 0.2.0rc3 (or official?)
2022-12-13 21:22:43 +00:00
ValueRaider
df7af507f0 Merge pull request #1233 from ranaroussi/revise-reqs
Raise reqs min versions (lxml, pandas)
2022-12-13 18:12:48 +00:00
ValueRaider
46dbed3e7e Merge pull request #1235 from ymyke/feature/add-history-metadata
Add `history_metadata` property
2022-12-13 18:09:14 +00:00
ValueRaider
46d5579caa Merge pull request #1236 from ranaroussi/feature/improve-reconstruction
Improve price repair
2022-12-13 17:28:21 +00:00
ValueRaider
11a3a9d457 Raise min lxml & pandas, sync all reqs lists 2022-12-13 15:25:34 +00:00
ValueRaider
6dca1eea96 Don't repair prices if can't calibrate 2022-12-13 14:47:27 +00:00
ymyke
85ef53c6bb Store _history_metadata earlier and use that attribute for further metadata access in the same function 2022-12-13 08:27:12 +01:00
ValueRaider
4c41ba0a50 Improve price repair
Minimise _reconstruct_intervals() #requests ; Refine when to repair NaNs
2022-12-12 16:43:24 +00:00
ymyke
6f60a78262 Add history_metadata property
Including test and README mention.

See also https://github.com/ranaroussi/yfinance/issues/1195.
2022-12-12 17:16:05 +01:00
ValueRaider
8f083818c3 Merge pull request #1232 from ranaroussi/fix/no-history-caching
If fetching price history ending in future, don't use cache
2022-12-10 21:13:39 +00:00
ValueRaider
791c845d23 Merge pull request #1194 from ranaroussi/feature/old-financials-backup
Serve old financials when new financials are missing
2022-12-10 21:13:09 +00:00
ValueRaider
aeea23229f Merge branch 'dev' into feature/old-financials-backup 2022-12-10 21:12:06 +00:00
ValueRaider
e91ffe4844 Replace 'fallback' with 'legacy' arg 2022-12-10 21:05:42 +00:00
ValueRaider
df9d456cf6 Merge pull request #1221 from ranaroussi/feature/financials-format-default
Default enable 'pretty' financials, explain in README
2022-12-10 19:44:13 +00:00
ValueRaider
4c89e8aefa Account for data delay ; Remove debug code ; Fix session test 2022-12-10 18:27:23 +00:00
ValueRaider
7ddce7f80b Update issue template - add note on Yahoo spam 2022-12-08 13:57:21 +00:00
ValueRaider
b3dbbc46e2 If fetching price history ending in future, don't use cache 2022-12-06 18:04:30 +00:00
ValueRaider
762d446661 Default enable 'pretty' financials, explain in README 2022-12-01 18:49:43 +00:00
ValueRaider
1aa3c3d9a8 Merge pull request #1220 from ranaroussi/feature/improve-repair-zero
Improve handling dividends without matching price interval
2022-12-01 17:14:59 +00:00
ValueRaider
0f6ad3290d Merge pull request #1217 from ranaroussi/fix/Yahoo-duplication-fix
Extend Yahoo duplication fix to intra-day
2022-12-01 17:14:41 +00:00
ValueRaider
e26a4c5a1c Improve handling dividends without matching price interval
Tolerate merging daily dividend event without matching prices interval (just append).
Move price-repair to after merge, to fix these missing prices intervals.
Improve bad-price detection & repair.
2022-12-01 17:11:05 +00:00
ValueRaider
d963e3fe1c Fix dev merge ; Fix financials fallback fetch 2022-12-01 15:47:37 +00:00
ValueRaider
0cd54486d0 Merge pull request #1216 from ymyke/fix/readme-several
Fix a couple of minor issues in README
2022-11-30 22:35:47 +00:00
ValueRaider
f93c3d76ce Extend Yahoo duplication fix to intra-day 2022-11-30 17:05:22 +00:00
ValueRaider
8bf7576b33 Merge pull request #1215 from fredrik-corneliusson/dev_verify_ticker_history_call
Test to verify ticker history request.
2022-11-29 23:11:46 +00:00
ymyke
2eae33bd33 Fix a couple of minor issues in README
- Typos in variable name
- `Ticker` doesn't support several tickers
- `Tickers` doesn't return named tuple
- "1m" in `download` would produce an error for longer timeframes, so
  changing the example to "5d"
2022-11-29 23:28:16 +01:00
Fredrik Corneliusson
5e333f53ee #1213 Added test asserting no harmful requests are added to history call. 2022-11-29 01:18:59 +01:00
ValueRaider
9c249a100f Merge pull request #1203 from ranaroussi/fix/capital-gains-perf-regression
Get quote type from metadata instead info[] -> faster
2022-11-28 18:13:29 +00:00
ValueRaider
0ee3d6d72d Merge pull request #1208 from fredrik-corneliusson/mydev
#1207 Fixed regression issue with Python < 3.9
2022-11-27 19:23:33 +00:00
ValueRaider
3c218b81a3 Merge pull request #1210 from fredrik-corneliusson/mydev_1209
#1209 Fixed pretty format alters cached dataframe
2022-11-27 19:22:06 +00:00
ValueRaider
80dc0e8488 Merge branch 'dev' into feature/old-financials-backup 2022-11-27 19:19:03 +00:00
ValueRaider
4064ec53c3 Move financials fallback logic into Ticker 2022-11-27 19:15:35 +00:00
Fredrik Corneliusson
37ac9bd1d5 #1209 Fixed pretty format alters cached dataframe 2022-11-27 19:25:08 +01:00
Fredrik Corneliusson
e234b8c5ab #1207 Fixed regression issue with Python < 3.9 2022-11-27 19:00:45 +01:00
ValueRaider
efc56c43c2 Improve bug issue template - request version info 2022-11-27 12:50:56 +00:00
ValueRaider
50de008820 Merge pull request #1193 from ranaroussi/fix/financials-formatting
Fix financials formatting
2022-11-26 21:40:30 +00:00
ValueRaider
d7baa0713e Get quote type from metadata instead info[] -> faster 2022-11-25 22:18:09 +00:00
ValueRaider
3b19ef12bc camel2title(): restrict acceptable inputs 2022-11-24 20:36:00 +00:00
ValueRaider
dfb15e6778 Unit tests for financials formatting 2022-11-23 18:16:51 +00:00
ValueRaider
379b87d925 Moved financials formatting up into get()
Moved financials formatting up into get(), controlled by new 'pretty' argument. Extend camel2title() to accept different separator char and to preserve acronyms case e.g. 'EBIT'
2022-11-23 17:45:45 +00:00
ValueRaider
b856041b53 Merge pull request #1177 from ranaroussi/fix/dst-nonexistent
Fix localizing midnight when non-existent (DST) #1174
2022-11-22 22:19:40 +00:00
ValueRaider
b3b36c5cc9 Restore old financials as backup if new missing 2022-11-22 22:17:07 +00:00
ValueRaider
ab1476c0d1 Restore financials nesting code (commented) 2022-11-22 21:46:26 +00:00
ValueRaider
566a38b432 Fix financials index formatting 2022-11-22 21:46:04 +00:00
ValueRaider
96e4532a9d Merge pull request #1183 from fredrik-corneliusson/dev
Made fetching earnings_dates faster
2022-11-21 17:11:33 +00:00
ValueRaider
bd3569367e Bugfix for PR #1166 2022-11-21 17:04:15 +00:00
Fredrik Corneliusson
20680b0e38 Lowered get earnings_dates limit and removed earnings_history API.
earnings_history/get_earnings_history was redundant as it was an incomplete implementation of get_earnings_dates().
2022-11-21 17:48:20 +01:00
ValueRaider
44e8d2b46b Merge pull request #1166 from thirumalairajr/feature/add_capital_gains
Add support for capital_gains data
2022-11-21 13:41:20 +00:00
Thirumalai Raj R
80c659be71 Addressing PR comments 2022-11-21 17:12:15 +05:30
Fredrik Corneliusson
06640102f8 Made fetching earnings_dates faster
Avoid unnecessary request when fetching earnings_date.
Added support to limit argument to only fetch as many as needed.
2022-11-20 02:19:16 +01:00
ValueRaider
a0c47c9944 Merge pull request #1179 from fredrik-corneliusson/dev
Add glob try/except in threaded implementation.
2022-11-19 17:47:46 +00:00
ValueRaider
744e70ffff Add issue template for 'feature request' 2022-11-19 13:46:06 +00:00
Fredrik Corneliusson
e6211896f7 Add glob try/except in threaded implementation.
Needed as current thead implementation breaks if exception is raised.
2022-11-19 12:42:49 +01:00
Thirumalai Raj R
ca27d070f0 Migrating to get_info function 2022-11-17 19:39:57 +05:30
Thirumalai Raj R
82b99b5c9e Merge branch 'feature/add_capital_gains' of github.com:thirumalairajr/yfinance into feature/add_capital_gains 2022-11-17 19:28:40 +05:30
Thirumalai Raj R
c5c1567321 Handle non existant columns while downloading 2022-11-17 19:28:13 +05:30
Thirumalai Raj R
1adc908788 Merge branch 'dev' into feature/add_capital_gains 2022-11-16 18:36:22 +05:30
ValueRaider
2970d9460f Fix localizing midnight when non-existent (DST) #1174 2022-11-16 12:34:36 +00:00
ValueRaider
f0b5db234a Merge pull request #1154 from fredrik-corneliusson/refactor_base_py
Major refactoring
2022-11-15 21:58:06 +00:00
Fredrik Corneliusson
c6f760e61c Fixed #1172 - exception if tz cache file was empty 2022-11-15 22:43:58 +01:00
Thirumalai Raj R
6067d2a590 Addressing PR review comments 2022-11-15 14:23:31 +05:30
Fredrik Corneliusson
c56e3496db Align requirements.txt file with setup.py package dependencies. 2022-11-14 22:03:36 +01:00
ValueRaider
55fd565ef0 Update bug_report.md - ask 'Does Yahoo have data?' 2022-11-14 20:45:30 +00:00
Thirumalai Raj R
231d985c82 Merge branch 'feature/add_capital_gains' of github.com:thirumalairajr/yfinance into feature/add_capital_gains 2022-11-14 15:01:42 +05:30
Thirumalai Raj R
0f433d7e5d Add capital gains data only for MutualFund and ETF 2022-11-14 15:01:27 +05:30
Thirumalai Raj R
e188c7e41f Merge branch 'dev' into feature/add_capital_gains 2022-11-14 14:08:08 +05:30
Fredrik Corneliusson
fa7d743826 Do persist requests_cache in tests. 2022-11-13 21:26:34 +01:00
Fredrik Corneliusson
f4b3348c8e Continued refactoring
Fix for #1171
Change default start to 1900-01-01
Refactored financials to remove unnecessary requests
Dividends not working on windows (DEV)
Add typehints to API
2022-11-13 20:27:16 +01:00
Fredrik Corneliusson
724118a671 Major refactoring
Made fundamentals, quote, analysis and holders into lazy scraped modules in order to improve performance and code maintainability.
2022-11-13 17:47:16 +01:00
ValueRaider
ea95d718ee Merge pull request #1169 from ppeloton/fix-issue-980
Fixing issue 980 by changing default timestamp for start parameter in…
2022-11-13 11:37:42 +00:00
ppeloton
9ba3d5a1ea Fixing issue 980 by changing default timestamp for start parameter in base.py 2022-11-13 08:44:43 +02:00
ValueRaider
b67372e4eb Version 0.2.0rc2 2022-11-12 21:28:22 +00:00
ValueRaider
77107c6ea0 Merge pull request #1168 from ranaroussi/dev
Merge dev -> main for release 0.2.0rc2
2022-11-12 21:20:34 +00:00
Thirumalai Raj R
1ed4b4b65d For ETFs & Mutual Funds, add capitalGains 2022-11-11 20:57:39 +05:30
ValueRaider
2a0e14962e Merge pull request #1157 from fredrik-corneliusson/fix-fundamentals-regression-bug
Fix fundamentals regression bug
2022-11-11 11:51:50 +00:00
ValueRaider
43aae83a1b Merge pull request #1161 from ranaroussi/fix/events-merge
Fix merging events with day/wk/mth prices
2022-11-10 21:50:56 +00:00
Fredrik Corneliusson
fff8e9145d Fixed #1160 2022-11-10 19:17:18 +01:00
ValueRaider
4f1e7a49c4 Fix merging events with day/wk/mth prices 2022-11-10 14:15:18 +00:00
Fredrik Corneliusson
357da735ea Fix fundamentals regression bug
The wrong data was returned for balance_sheet and cashflow
2022-11-10 01:51:15 +01:00
ValueRaider
b7b3b4975d Merge pull request #1148 from ranaroussi/feature/improve-repair-zero
Repair: add zero-price repair + refactor
2022-11-08 15:13:28 +00:00
ValueRaider
7d5fdb6f83 Merge branch 'dev' into feature/improve-repair-zero 2022-11-08 15:10:41 +00:00
ValueRaider
1c2ed86313 Repair: implement _fix_zero_prices(), refactor _fix_unit_mixups(), improve ratio calc 2022-11-08 15:04:59 +00:00
ValueRaider
23e8423b8b Merge pull request #1147 from fredrik-corneliusson/request_optimization
Request optimization
2022-11-08 14:44:55 +00:00
Fredrik Corneliusson
c7cf4378f6 Lowered lru_cache size and made cache_info and cache_clear work on lru_cached methods. 2022-11-08 01:36:28 +01:00
Fredrik Corneliusson
5bfbec5df0 Decreased default cache_maxsize for lru_cache after some investigation of memory usage. Also fixed warning about wrong type used for dataframe index. 2022-11-07 10:29:42 +01:00
Fredrik Corneliusson
a775669ac5 Tried to clean up the temp folder creation in test. 2022-11-07 00:00:55 +01:00
Fredrik Corneliusson
f96dfc25c2 Merge branch 'dev' into request_optimization
# Conflicts:
#	tests/ticker.py
#	yfinance/base.py
2022-11-06 23:31:14 +01:00
fredrik-corneliusson
f6c311815d Merge branch 'ranaroussi:main' into request_optimization 2022-11-06 22:47:57 +01:00
Fredrik Corneliusson
112fd5cf64 Added more tests for Ticker and missing dependencies. 2022-11-06 20:49:36 +01:00
Fredrik Corneliusson
2be718700f Fixed regression bug with balance_sheet and added test for it. 2022-11-06 20:16:10 +01:00
ValueRaider
080d33f597 Fix typo in #1140 2022-11-06 19:00:28 +00:00
ValueRaider
c248b422da Merge pull request #1140 from ranaroussi/fix/bad-ticker-handling
Improve bad ticker handling ; Remove redundant get_earnings_history()
2022-11-06 18:47:11 +00:00
ValueRaider
b050692ee4 Improve bad ticker handling ; Remove redundant get_earnings_history() 2022-11-06 18:30:05 +00:00
Fredrik Corneliusson
2fed55a0d1 Improved TestTickerHolders test. 2022-11-06 19:14:51 +01:00
Fredrik Corneliusson
438f512f47 Cleaned up .gitignore 2022-11-06 17:01:09 +01:00
Fredrik Corneliusson
157b45269d Fixed regression in PR and cleaned up .gitignore 2022-11-06 16:59:06 +01:00
ValueRaider
9b169e60fb Merge pull request #1143 from ranaroussi/fix/dst-bugfix
Fix the fixes for Yahoo data issues (DST, weekly-2-rows) + tests
2022-11-06 13:48:53 +00:00
ValueRaider
1b439c4af6 Fix the fixes for Yahoo data issues (DST, weekly-2-rows) + tests 2022-11-06 13:47:34 +00:00
Fredrik Corneliusson
2cc3cbb1e8 Removed extra requests logging used for debugging. 2022-11-06 14:06:39 +01:00
Fredrik Corneliusson
743f3acb87 Removed unused get_html method. 2022-11-06 13:55:44 +01:00
Fredrik Corneliusson
9f9f7b00d8 Revert to require a pandas version that supports 3.6.1 2022-11-06 13:50:21 +01:00
Fredrik Corneliusson
07e19f187a Dropped Python versions before 3.6 from package support metadata 2022-11-06 13:34:58 +01:00
ValueRaider
45169d9ff4 Merge pull request #1144 from fredrik-corneliusson/dev
Fixed some of the formatting errors reported by PyCharm as well as simplified some code constructs.
2022-11-06 12:27:10 +00:00
Fredrik Corneliusson
05520ee108 Have one place to retrieve data in order to ease caching and speed up operations and reduce code duplication. Needs Python 3.6 2022-11-06 13:26:52 +01:00
Fredrik Corneliusson
e1dec42950 Fix regression in PR #1144 2022-11-06 13:23:44 +01:00
Fredrik Corneliusson
369cbc41e5 Merge remote-tracking branch 'origin/dev' into dev
# Conflicts:
#	yfinance/base.py
2022-11-05 13:46:41 +01:00
ValueRaider
4d4c5c1819 Merge pull request #1138 from ranaroussi/patch/peg-ratio-trailing
Move get 'trailingPegRatio' into _get_info(), simplify & optimise
2022-11-02 15:19:43 +00:00
ValueRaider
028334de92 Merge pull request #1058 from PlanetNamekTech/patch-1
Update README.md with new notation
2022-11-02 15:16:55 +00:00
ValueRaider
2b1d5c848c Move get 'trailingPegRatio' into _get_info(), simplify & optimise 2022-10-31 23:51:40 +00:00
Fredrik Corneliusson
fe5a9d70e5 Removed unnecessary escaping in regexp. Formatted code and updated if statements to make sure variables used always is defined. 2022-10-29 16:43:45 +02:00
Fredrik Corneliusson
e89fe6357b Simplified the code and changed **kwargs to normal keyword arguments. 2022-10-29 13:32:32 +02:00
ValueRaider
bcd6e5b11d Merge pull request #1128 from ranaroussi/fix/financials-data
Fix financials tables
2022-10-28 14:30:41 +01:00
ValueRaider
e83cc74800 Merge branch 'dev' into fix/financials-data 2022-10-28 14:26:07 +01:00
ValueRaider
9e529f3c8f Revert version bump 2022-10-28 14:18:13 +01:00
ValueRaider
e29df56253 Financials - reorder rows to match website, disable MultiIndex 2022-10-28 14:16:54 +01:00
Fredrik Corneliusson
f6a0979916 Simplified the code and changed **kwargs to normal keyword arguments. 2022-10-28 02:19:59 +02:00
ValueRaider
e3d2c5d6d7 Merge pull request #1064 from Jossan84/main
Bugfix: Get logo url when no website exists
2022-10-27 22:29:12 +01:00
Fredrik Corneliusson
a836f24144 Fixed some of the formatting errors reported by PyCharm IDE 2022-10-27 00:32:58 +02:00
ValueRaider
fb5c67b3bd Bump version to 0.2.0rc1 - big update 2022-10-26 22:39:41 +01:00
ValueRaider
3f33aa0377 Merge pull request #1119 from ranaroussi/dev
Improve error handling
2022-10-26 16:23:59 +01:00
ValueRaider
ecdc36ab8e Merge pull request #1118 from fredrik-corneliusson/dev
Better handling of error from yahoo API, added missing pytz dependency and fixed if statement syntax warnings
2022-10-25 21:47:43 +01:00
Fredrik Corneliusson
fbc5de153a Handle error from yahoo api 2022-10-25 21:52:09 +02:00
Fredrik Corneliusson
e4a228b830 Some fixes and better debug if failing to fetch timezone from ticker. 2022-10-25 21:36:42 +02:00
Fredrik Corneliusson
3cee66dea7 Some fixes and better debug if failing to fetch timezone from ticker. 2022-10-25 21:22:45 +02:00
ValueRaider
bec5b38189 Merge pull request #1117 from ranaroussi/dev
Merge all dev updates into main
2022-10-25 18:13:13 +01:00
ValueRaider
f5973b2c89 Merge branch 'main' into dev 2022-10-25 17:49:02 +01:00
ValueRaider
edb911b913 Pre-emptive ambiguous DST fix 2022-10-25 17:42:44 +01:00
ValueRaider
6117b0a042 Fix syntax error 2022-10-25 16:56:32 +01:00
ValueRaider
5cb5484a9a Fix tests.ticker to use new cache API 2022-10-25 14:16:14 +01:00
ValueRaider
4e33ddf615 Merge pull request #1113 from fredrik-corneliusson/dev
Fix cache error on read only system #1108
2022-10-25 14:14:08 +01:00
ValueRaider
6d87f3d689 Fix PR merge 2022-10-25 14:10:23 +01:00
ValueRaider
b30b97fa36 Merge pull request #1116 from ranaroussi/fix/outlier-repair-bugfixes
Fix price repair ; Improve repair test
2022-10-25 14:05:20 +01:00
ValueRaider
6253e1d8a0 Merge pull request #1112 from ranaroussi/fix/get-tz-performance
Improve performance of fetching Ticker timezone
2022-10-25 14:03:04 +01:00
ValueRaider
2dce6a705c Remove debug code 2022-10-25 14:01:44 +01:00
ValueRaider
df11fcdb37 Improve Ticker._fetch_ticker_tz() ; Change timeout default to 10 2022-10-25 13:59:51 +01:00
fredrik-corneliusson
567e2cf0d3 Merge branch 'ranaroussi:dev' into dev 2022-10-25 01:00:47 +02:00
Fredrik Corneliusson
3d6e88857b Merge remote-tracking branch 'origin/dev' into dev 2022-10-25 01:00:13 +02:00
Fredrik Corneliusson
59af19d84c Fix cache error on read only system #1108 2022-10-25 00:59:05 +02:00
ValueRaider
e07191b627 Fix price repair ; Improve repair test 2022-10-24 23:55:16 +01:00
ValueRaider
2623ba967d Simplify Ticker._fetch_ticker_tz() - 2 2022-10-24 13:46:39 +01:00
ValueRaider
fe1c705e24 Simplify Ticker._fetch_ticker_tz() 2022-10-24 13:45:25 +01:00
ValueRaider
9315f7b61d Add Ticker._fetch_ticker_tz() for faster tz fetch 2022-10-24 13:34:57 +01:00
ValueRaider
f76c788881 Remove debug print 2022-10-24 11:21:56 +01:00
ValueRaider
561f56c9f9 Merge pull request #1110 from ranaroussi/feature/outlier-repair
Feature - repair 100x price errors
2022-10-24 00:16:25 +01:00
ValueRaider
cf795ea0c7 Merge pull request #1109 from fredrik-corneliusson/dev
Fix for #1076
2022-10-24 00:03:40 +01:00
ValueRaider
643536b53b Fix '_fix_unit_mixups()' when data missing split-adjustment 2022-10-23 23:46:33 +01:00
fredrik-corneliusson
ae8a5ff996 Merge branch 'ranaroussi:dev' into dev 2022-10-23 23:12:21 +02:00
Fredrik Corneliusson
d01d378c8d Small cleanup to ease finding bug #1076. Begun by getting rid of multiple calls to self.info (get_info). 2022-10-23 22:37:07 +02:00
ValueRaider
9e0152aae4 Merge pull request #1105 from fredrik-corneliusson/dev
Fix and improve timezone cache concurrency
2022-10-23 16:43:53 +01:00
Fredrik Corneliusson
6c21c1994e Fix bug, create cache directory if it does not exists. 2022-10-23 15:27:41 +02:00
Fredrik Corneliusson
d24a25f579 Add missing typehint 2022-10-23 13:59:48 +02:00
Fredrik Corneliusson
422a50672d Lazy init of cache db and added migration of data from old CSV cache. 2022-10-23 13:43:40 +02:00
ValueRaider
6e09410c7d Improve repair feedback msg 2022-10-23 00:03:23 +01:00
ValueRaider
3c51687351 Add arg history(repair=False) to fix $/cents £/p mixups 2022-10-22 23:58:20 +01:00
Fredrik Corneliusson
783df54978 Bugfix, do not set tz in cache if it is None, just delete it. 2022-10-22 23:56:50 +02:00
Fredrik Corneliusson
c76bf0128f Improve timezone cache to make it more reliable when using threads by using SQLLite. 2022-10-22 23:30:48 +02:00
ValueRaider
33f57ac002 Merge pull request #1104 from ranaroussi/feature/improve-err-msgs
Improve error message handling
2022-10-22 16:30:47 +01:00
ValueRaider
c0e1536179 Improve error message handling
Add error check for 'period' ; simplify err-msg handling ; new arg 'raise_errors' to control print-vs-Exception
2022-10-21 23:36:37 +01:00
ValueRaider
303e0ea655 Merge pull request #1102 from ranaroussi/fix/price-tz-and-events
Various fixes to price data
2022-10-21 22:19:11 +01:00
ValueRaider
40424b71a6 Fix test 'test_intraDayWithEvents' 2022-10-21 17:26:15 +01:00
ValueRaider
b018f917a9 Port in: 'Fix when Yahoo returns price=NaNs on dividend day' 2022-10-21 17:21:19 +01:00
ValueRaider
28e50946ca Fix Ticker.dividends property 2022-10-21 15:44:36 +01:00
ValueRaider
841b485b1d Drop out-of-date-range events 2022-10-21 15:37:51 +01:00
ValueRaider
e842a9d657 Event-merge fixes: intra-day, weely, lost tz, 'test_intraDayWithEvents' 2022-10-21 15:26:59 +01:00
ValueRaider
0f14728591 Add test 'test_tz_dst_ambiguous' 2022-10-21 15:26:45 +01:00
ValueRaider
69dfe325ae Add tz to daily price data 2022-10-21 12:54:48 +01:00
ValueRaider
f20aa9a875 Merge pull request #1099 from ranaroussi/feature/improve-tz-cache
Improve timezone cache
2022-10-21 10:31:01 +01:00
ValueRaider
5707c1aa65 Merge branch 'fix/download-timezones' into dev 2022-10-21 10:16:27 +01:00
ValueRaider
053e0b9abb Port in @git-shogg fix, & fix typos 2022-10-20 22:24:24 +01:00
ValueRaider
730afda4a7 Fix financials placeholders 2022-10-20 22:19:08 +01:00
ValueRaider
1e7f4a9a91 Strengthen tz-cache against bad/corrupt values - more 2022-10-20 22:09:37 +01:00
ValueRaider
37c36549e4 Add mutex to tz-cache update 2022-10-20 22:01:08 +01:00
ValueRaider
bda339b170 Strengthen tz-cache against bad/corrupt values 2022-10-20 21:59:20 +01:00
ValueRaider
f5995161ed Optimise TZ cache indexing 2022-10-20 21:54:58 +01:00
ValueRaider
6e96a1a8e6 Refactor properly ; Rename some new properties 2022-10-17 16:40:12 +01:00
ValueRaider
68b8671cea Merge pull request #776 from git-shogg/main
Enhanced the detail of the annual financial statements and added ability to check the "Analysis" url.
2022-10-17 12:59:51 +01:00
Stephen Hogg
3b8114c135 Functions to minimize get_fundamentals. _DEV tests removed. 2022-10-17 21:02:58 +10:00
Stephen Hogg
d65391b798 Merged (refactored). Quarterlies updates. Multi-Index Inlcluded. 2022-10-16 17:34:23 +10:00
Stephen Hogg
6c4da51519 Remediated missing annual table line items flagged by @ValueRaider. 2022-10-15 11:22:39 +10:00
ValueRaider
4734e92090 Merge pull request #1070 from ranaroussi/fix/weekly-prices
Fix weekly/monthly prices across 2 rows
2022-10-14 23:18:59 +01:00
ValueRaider
5fdf2463e9 Merge branch 'dev' into fix/weekly-prices 2022-10-14 23:18:15 +01:00
ValueRaider
c679551faa Add unittest for duplication fix 2022-10-14 23:15:13 +01:00
ValueRaider
fdf52ac360 Merge pull request #1086 from ranaroussi/fix/events-merge
Fix merging pre-market events with min/hour prices
2022-10-14 14:08:35 +01:00
ValueRaider
94ad0bd955 Fix merging pre-market events with min/hour prices 2022-10-12 22:41:10 +01:00
ValueRaider
51c0ea0050 Enhance recent unittest 2022-10-10 15:37:55 +01:00
ValueRaider
3401d4dbe7 Merge pull request #1069 from ranaroussi/fix/events-merge
Fix merging of dividends/splits with prices
2022-10-10 14:01:31 +01:00
ValueRaider
a724585552 Tidy syntax 2022-10-10 14:00:10 +01:00
ValueRaider
1c85433cc0 Add unittest for div/splits merging 2022-10-10 13:58:17 +01:00
ValueRaider
34e1b2f157 Add new time-series functions and compare against new scraping 2022-10-09 17:20:07 +01:00
Value Raider
c80bfc0417 Manually merge pull request #776 - Fix & enhance annual financials 2022-10-08 20:31:42 +01:00
ValueRaider
5c0b2bbaa3 Fix weekly/monthly prices across 2 rows 2022-10-02 18:26:05 +01:00
ValueRaider
7d45a6709a Fix merging of dividends/splits with prices 2022-10-02 18:20:11 +01:00
Jose Manuel
42e5751705 Bugfix: Get logo url when no website exists 2022-09-19 13:54:56 +02:00
PlanetNamekTech
2ff2c57dcf Update README.md with new notation
Multiple ticker objects section doesn't seem to work with dot notation when accessing ticker.
2022-09-09 18:38:24 -07:00
Stephen Hogg
3f23c067f9 Updated to ensure .info is brought in correctly. 2021-08-08 13:46:53 +10:00
Stephen Hogg
7a395c37e9 Updated base with some exceptions and utils. 2021-07-18 20:51:33 +10:00
Stephen Hogg
e7c55bbdec Updated to resolve Travis CI Build Fail. 2021-07-18 12:37:15 +10:00
Stephen Hogg
87dc9fb345 Potential issues flagged by CodeFactor updated. Believe that the warnings with regards to "statement seems to have no effect" is because the import on this python file is yfinance (master version 1.63). 2021-07-18 12:16:42 +10:00
Stephen Hogg
81a0a4e665 Updated to align with the yfinance upstream main. 2021-07-18 11:02:41 +10:00
Stephen Hogg
4774485477 Merge https://github.com/ranaroussi/yfinance into main 2021-07-18 10:56:45 +10:00
Stephen Hogg
c3da55f2e4 Finished updating all of the required updates. 2021-07-12 19:34:18 +10:00
Stephen Hogg
a1769e4fe1 Added self._income_statement, once happy this new dataframe should replace self._financials. Further work required to understand if there is the opportunity to also do this for balance sheet and cf statement. 2021-07-10 10:48:15 +10:00
Stephen Hogg
cb31036153 Updated to include analyst price target forecasts. 2021-07-08 17:11:10 +10:00
Stephen Hogg
da2672f338 Updated README to include additional functions. 2021-07-08 16:47:47 +10:00
Stephen Hogg
91f4891475 Added some functions to enable us to grab key details from the analysis section of yahoo finance: current_recommendations, revenue_forecasts and earnings_forecasts. 2021-07-07 20:33:35 +10:00
Stephen Hogg
883b7f0775 Updated the utils get_json function to standardize the return. This will enable us to see other store types from the base.py file. 2021-07-07 18:50:10 +10:00
27 changed files with 5218 additions and 849 deletions

View File

@@ -7,14 +7,37 @@ assignees: ''
---
*** READ BEFORE POSTING ***
# IMPORTANT
Before posting an issue - please upgrade to the latest version and confirm the issue/bug is still there.
If you want help, you got to read this first, follow the instructions.
### Are you up-to-date?
Upgrade to the latest version and confirm the issue/bug is still there.
Upgrade using:
`$ pip install yfinance --upgrade --no-cache-dir`
Bug still there? Delete this content and submit your bug report here and provide the following, as best you can:
Confirm by running:
- Simple code that reproduces your problem
- The error message
`import yfinance as yf ; print(yf.__version__)`
and comparing against [PIP](https://pypi.org/project/yfinance/#history).
### Does Yahoo actually have the data?
Are you spelling ticker *exactly* same as Yahoo?
Then visit `finance.yahoo.com` and confirm they have the data you want. Maybe your ticker was delisted, or your expectations of `yfinance` are wrong.
### Are you spamming Yahoo?
Yahoo Finance free service has rate-limiting depending on request type - roughly 60/minute for prices, 10/minute for info. Once limit hit, Yahoo can delay, block, or return bad data. Not a `yfinance` bug.
### Still think it's a bug?
Delete this default message (all of it) and submit your bug report here, providing the following as best you can:
- Simple code that reproduces your problem, that we can copy-paste-run
- Exception message with full traceback, or proof `yfinance` returning bad data
- `yfinance` version and Python version
- Operating system type

View File

@@ -0,0 +1,14 @@
---
name: Feature request
about: Request a new feature
title: ''
labels: ''
assignees: ''
---
**Describe the problem**
**Describe the solution**
**Additional context**

7
.gitignore vendored
View File

@@ -9,3 +9,10 @@ build/
*.html
*.css
*.png
# Environments
.env
.venv
env/
venv/
ENV/

View File

@@ -1,6 +1,103 @@
Change Log
===========
0.2.12
------
Disable annoying 'backup decrypt' msg
0.2.11
------
Fix history_metadata accesses for unusual symbols #1411
0.2.10
------
General
- allow using sqlite3 < 3.8.2 #1380
- add another backup decrypt option #1379
Prices
- restore original download() timezone handling #1385
- fix & improve price repair #1289 2a2928b 86d6acc
- drop intraday intervals if in post-market but prepost=False #1311
Info
- fast_info improvements:
- add camelCase keys, add dict functions values() & items() #1368
- fix fast_info["previousClose"] #1383
- catch TypeError Exception #1397
0.2.9
-----
- Fix fast_info bugs #1362
0.2.7
-----
- Fix Yahoo decryption, smarter this time #1353
- Rename basic_info -> fast_info #1354
0.2.6
-----
- Fix Ticker.basic_info lazy-loading #1342
0.2.5
-----
- Fix Yahoo data decryption again #1336
- New: Ticker.basic_info - faster Ticker.info #1317
0.2.4
-----
- Fix Yahoo data decryption #1297
- New feature: 'Ticker.get_shares_full()' #1301
- Improve caching of financials data #1284
- Restore download() original alignment behaviour #1283
- Fix the database lock error in multithread download #1276
0.2.3
-----
- Make financials API '_' use consistent
0.2.2
-----
- Restore 'financials' attribute (map to 'income_stmt')
0.2.1
-----
Release!
0.2.0rc5
--------
- Improve financials error handling #1243
- Fix '100x price' repair #1244
0.2.0rc4
--------
- Access to old financials tables via `get_income_stmt(legacy=True)`
- Optimise scraping financials & fundamentals, 2x faster
- Add 'capital gains' alongside dividends & splits for ETFs, and metadata available via `history_metadata`, plus a bunch of price fixes
For full list of changes see #1238
0.2.0rc2
--------
Financials
- fix financials tables to match website #1128 #1157
- lru_cache to optimise web requests #1147
Prices
- improve price repair #1148
- fix merging dividends/splits with day/week/monthly prices #1161
- fix the Yahoo DST fixes #1143
- improve bad/delisted ticker handling #1140
Misc
- fix 'trailingPegRatio' #1138
- improve error handling #1118
0.2.0rc1
--------
Jumping to 0.2 for this big update. 0.1.* will continue to receive bug-fixes
- timezone cache performance massively improved. Thanks @fredrik-corneliusson #1113 #1112 #1109 #1105 #1099
- price repair feature #1110
- fix merging of dividends/splits with prices #1069 #1086 #1102
- fix Yahoo returning latest price interval across 2 rows #1070
- optional: raise errors as exceptions: raise_errors=True #1104
- add proper unit tests #1069
0.1.81
------
- Fix unhandled tz-cache exception #1107

190
README.md
View File

@@ -42,6 +42,11 @@ Yahoo! finance API is intended for personal use only.**
---
## News [2023-01-27]
Since December 2022 Yahoo has been encrypting the web data that `yfinance` scrapes for non-market data. Fortunately the decryption keys are available, although Yahoo moved/changed them several times hence `yfinance` breaking several times. `yfinance` is now better prepared for any future changes by Yahoo.
Why is Yahoo doing this? We don't know. Is it to stop scrapers? Maybe, so we've implemented changes to reduce load on Yahoo. In December we rolled out version 0.2 with optimised scraping. Then in 0.2.6 introduced `Ticker.fast_info`, providing much faster access to some `info` elements wherever possible e.g. price stats and forcing users to switch (sorry but we think necessary). `info` will continue to exist for as long as there are elements without a fast alternative.
## Quick Start
### The Ticker module
@@ -53,38 +58,45 @@ import yfinance as yf
msft = yf.Ticker("MSFT")
# get stock info
# get all stock info (slow)
msft.info
# fast access to subset of stock info (opportunistic)
msft.fast_info
# get historical market data
hist = msft.history(period="max")
hist = msft.history(period="1mo")
# show actions (dividends, splits)
# show meta information about the history (requires history() to be called first)
msft.history_metadata
# show actions (dividends, splits, capital gains)
msft.actions
# show dividends
msft.dividends
# show splits
msft.splits
msft.capital_gains # only for mutual funds & etfs
# show financials
msft.financials
msft.quarterly_financials
# show share count
# - yearly summary:
msft.shares
# - accurate time-series count:
msft.get_shares_full(start="2022-01-01", end=None)
# show major holders
msft.major_holders
# show institutional holders
msft.institutional_holders
# show balance sheet
# show financials:
# - income statement
msft.income_stmt
msft.quarterly_income_stmt
# - balance sheet
msft.balance_sheet
msft.quarterly_balance_sheet
# show cashflow
# - cash flow statement
msft.cashflow
msft.quarterly_cashflow
# see `Ticker.get_income_stmt()` for more options
# show holders
msft.major_holders
msft.institutional_holders
msft.mutualfund_holders
# show earnings
msft.earnings
@@ -95,11 +107,18 @@ msft.sustainability
# show analysts recommendations
msft.recommendations
msft.recommendations_summary
# show analysts other work
msft.analyst_price_target
msft.revenue_forecasts
msft.earnings_forecasts
msft.earnings_trend
# show next event (earnings, etc)
msft.calendar
# show all earnings dates
# Show future and historic earnings dates, returns at most next 4 quarters and last 8 quarters by default.
# Note: If more are needed use msft.get_earnings_dates(limit=XX) with increased limit argument.
msft.earnings_dates
# show ISIN code - *experimental*
@@ -128,37 +147,24 @@ msft.history(..., proxy="PROXY_SERVER")
msft.get_actions(proxy="PROXY_SERVER")
msft.get_dividends(proxy="PROXY_SERVER")
msft.get_splits(proxy="PROXY_SERVER")
msft.get_capital_gains(proxy="PROXY_SERVER")
msft.get_balance_sheet(proxy="PROXY_SERVER")
msft.get_cashflow(proxy="PROXY_SERVER")
msft.option_chain(..., proxy="PROXY_SERVER")
...
```
To use a custom `requests` session (for example to cache calls to the
API or customize the `User-agent` header), pass a `session=` argument to
the Ticker constructor.
```python
import requests_cache
session = requests_cache.CachedSession('yfinance.cache')
session.headers['User-agent'] = 'my-program/1.0'
ticker = yf.Ticker('msft aapl goog', session=session)
# The scraped response will be stored in the cache
ticker.actions
```
To initialize multiple `Ticker` objects, use
```python
import yfinance as yf
tickers = yf.Tickers('msft aapl goog')
# ^ returns a named tuple of Ticker objects
# access each ticker using (example)
tickers.tickers.MSFT.info
tickers.tickers.AAPL.history(period="1mo")
tickers.tickers.GOOG.actions
tickers.tickers['MSFT'].info
tickers.tickers['AAPL'].history(period="1mo")
tickers.tickers['GOOG'].actions
```
### Fetching data for multiple tickers
@@ -168,60 +174,46 @@ import yfinance as yf
data = yf.download("SPY AAPL", start="2017-01-01", end="2017-04-30")
```
I've also added some options to make life easier :)
`yf.download()` and `Ticker.history()` have many options for configuring fetching and processing, e.g.:
```python
data = yf.download( # or pdr.get_data_yahoo(...
# tickers list or string as well
tickers = "SPY AAPL MSFT",
# use "period" instead of start/end
# valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
# (optional, default is '1mo')
period = "ytd",
# fetch data by interval (including intraday if period < 60 days)
# valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
# (optional, default is '1d')
interval = "1m",
# Whether to ignore timezone when aligning ticker data from
# different timezones. Default is True. False may be useful for
# minute/hourly data.
ignore_tz = False,
# group by ticker (to access via data['SPY'])
# (optional, default is 'column')
group_by = 'ticker',
# adjust all OHLC automatically
# (optional, default is False)
auto_adjust = True,
# download pre/post regular market hours data
# (optional, default is False)
prepost = True,
# use threads for mass downloading? (True/False/Integer)
# (optional, default is True)
threads = True,
# proxy URL scheme use use when downloading?
# (optional, default is None)
proxy = None
)
yf.download(tickers = "SPY AAPL", # list of tickers
period = "1y", # time period
interval = "1d", # trading interval
ignore_tz = True, # ignore timezone when aligning data from different exchanges?
prepost = False) # download pre/post market hours data?
```
### Timezone cache store
Review the [Wiki](https://github.com/ranaroussi/yfinance/wiki) for more options and detail.
### Smarter scraping
To use a custom `requests` session (for example to cache calls to the
API or customize the `User-agent` header), pass a `session=` argument to
the Ticker constructor.
When fetching price data, all dates are localized to stock exchange timezone.
But timezone retrieval is relatively slow, so yfinance attemps to cache them
in your users cache folder.
You can direct cache to use a different location with `set_tz_cache_location()`:
```python
import yfinance as yf
yf.set_tz_cache_location("custom/cache/location")
...
import requests_cache
session = requests_cache.CachedSession('yfinance.cache')
session.headers['User-agent'] = 'my-program/1.0'
ticker = yf.Ticker('msft', session=session)
# The scraped response will be stored in the cache
ticker.actions
```
Combine a `requests_cache` with rate-limiting to avoid triggering Yahoo's rate-limiter/blocker that can corrupt data.
```python
from requests import Session
from requests_cache import CacheMixin, SQLiteCache
from requests_ratelimiter import LimiterMixin, MemoryQueueBucket
class CachedLimiterSession(CacheMixin, LimiterMixin, Session):
""" """
session = CachedLimiterSession(
per_second=0.9,
bucket_class=MemoryQueueBucket,
backend=SQLiteCache("yfinance.cache"),
)
```
### Managing Multi-Level Columns
@@ -239,6 +231,18 @@ yfinance?](https://stackoverflow.com/questions/63107801)
- How to download single or multiple tickers into a single
dataframe with single level column names and a ticker column
### Timezone cache store
When fetching price data, all dates are localized to stock exchange timezone.
But timezone retrieval is relatively slow, so yfinance attemps to cache them
in your users cache folder.
You can direct cache to use a different location with `set_tz_cache_location()`:
```python
import yfinance as yf
yf.set_tz_cache_location("custom/cache/location")
...
```
---
## `pandas_datareader` override
@@ -274,12 +278,16 @@ To install `yfinance` using `conda`, see
### Requirements
- [Python](https://www.python.org) \>= 2.7, 3.4+
- [Pandas](https://github.com/pydata/pandas) (tested to work with
\>=0.23.1)
- [Numpy](http://www.numpy.org) \>= 1.11.1
- [requests](http://docs.python-requests.org/en/master/) \>= 2.14.2
- [lxml](https://pypi.org/project/lxml/) \>= 4.5.1
- [appdirs](https://pypi.org/project/appdirs) \>=1.4.4
- [Pandas](https://github.com/pydata/pandas) \>= 1.3.0
- [Numpy](http://www.numpy.org) \>= 1.16.5
- [requests](http://docs.python-requests.org/en/master) \>= 2.26
- [lxml](https://pypi.org/project/lxml) \>= 4.9.1
- [appdirs](https://pypi.org/project/appdirs) \>= 1.4.4
- [pytz](https://pypi.org/project/pytz) \>=2022.5
- [frozendict](https://pypi.org/project/frozendict) \>= 2.3.4
- [beautifulsoup4](https://pypi.org/project/beautifulsoup4) \>= 4.11.1
- [html5lib](https://pypi.org/project/html5lib) \>= 1.1
- [cryptography](https://pypi.org/project/cryptography) \>= 3.3.2
### Optional (if you want to use `pandas_datareader`)

View File

@@ -1,5 +1,5 @@
{% set name = "yfinance" %}
{% set version = "0.1.58" %}
{% set version = "0.2.12" %}
package:
name: "{{ name|lower }}"
@@ -16,22 +16,34 @@ build:
requirements:
host:
- pandas >=0.24.0
- pandas >=1.3.0
- numpy >=1.16.5
- requests >=2.21
- requests >=2.26
- multitasking >=0.0.7
- lxml >=4.5.1
- appdirs >= 1.4.4
- lxml >=4.9.1
- appdirs >=1.4.4
- pytz >=2022.5
- frozendict >=2.3.4
- beautifulsoup4 >=4.11.1
- html5lib >=1.1
# - pycryptodome >=3.6.6
- cryptography >=3.3.2
- pip
- python
run:
- pandas >=0.24.0
- pandas >=1.3.0
- numpy >=1.16.5
- requests >=2.21
- requests >=2.26
- multitasking >=0.0.7
- lxml >=4.5.1
- appdirs >= 1.4.4
- lxml >=4.9.1
- appdirs >=1.4.4
- pytz >=2022.5
- frozendict >=2.3.4
- beautifulsoup4 >=4.11.1
- html5lib >=1.1
# - pycryptodome >=3.6.6
- cryptography >=3.3.2
- python
test:

View File

@@ -1,6 +1,11 @@
pandas>=0.24.0
pandas>=1.3.0
numpy>=1.16.5
requests>=2.26
multitasking>=0.0.7
lxml>=4.5.1
lxml>=4.9.1
appdirs>=1.4.4
pytz>=2022.5
frozendict>=2.3.4
beautifulsoup4>=4.11.1
html5lib>=1.1
cryptography>=3.3.2

View File

@@ -38,8 +38,8 @@ setup(
classifiers=[
'License :: OSI Approved :: Apache Software License',
# 'Development Status :: 3 - Alpha',
# 'Development Status :: 4 - Beta',
'Development Status :: 5 - Production/Stable',
'Development Status :: 4 - Beta',
#'Development Status :: 5 - Production/Stable',
'Operating System :: OS Independent',
@@ -50,20 +50,22 @@ setup(
'Topic :: Software Development :: Libraries',
'Topic :: Software Development :: Libraries :: Python Modules',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
# 'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
],
platforms=['any'],
keywords='pandas, yahoo finance, pandas datareader',
packages=find_packages(exclude=['contrib', 'docs', 'tests', 'examples']),
install_requires=['pandas>=0.24.0', 'numpy>=1.15',
install_requires=['pandas>=1.3.0', 'numpy>=1.16.5',
'requests>=2.26', 'multitasking>=0.0.7',
'lxml>=4.5.1', 'appdirs>=1.4.4'],
'lxml>=4.9.1', 'appdirs>=1.4.4', 'pytz>=2022.5',
'frozendict>=2.3.4',
# 'pycryptodome>=3.6.6',
'cryptography>=3.3.2',
'beautifulsoup4>=4.11.1', 'html5lib>=1.1'],
entry_points={
'console_scripts': [
'sample=sample:main',

View File

@@ -37,23 +37,27 @@ class TestTicker(unittest.TestCase):
ticker.dividends
ticker.splits
ticker.actions
ticker.shares
ticker.info
ticker.calendar
ticker.recommendations
ticker.earnings
ticker.quarterly_earnings
ticker.financials
ticker.quarterly_financials
ticker.income_stmt
ticker.quarterly_income_stmt
ticker.balance_sheet
ticker.quarterly_balance_sheet
ticker.cashflow
ticker.quarterly_cashflow
ticker.recommendations_summary
ticker.analyst_price_target
ticker.revenue_forecasts
ticker.sustainability
ticker.options
ticker.news
ticker.shares
ticker.earnings_history
ticker.earnings_trend
ticker.earnings_dates
ticker.earnings_forecasts
def test_holders(self):
for ticker in tickers:

1
tests/__init__.py Normal file
View File

@@ -0,0 +1 @@
#!/usr/bin/env python

9
tests/context.py Normal file
View File

@@ -0,0 +1,9 @@
# -*- coding: utf-8 -*-
import sys
import os
_parent_dp = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
_src_dp = _parent_dp
sys.path.insert(0, _src_dp)
import yfinance

633
tests/prices.py Normal file
View File

@@ -0,0 +1,633 @@
from .context import yfinance as yf
import unittest
import datetime as _dt
import pytz as _tz
import numpy as _np
import pandas as _pd
import requests_cache
class TestPriceHistory(unittest.TestCase):
session = None
@classmethod
def setUpClass(cls):
cls.session = requests_cache.CachedSession(backend='memory')
@classmethod
def tearDownClass(cls):
if cls.session is not None:
cls.session.close()
def test_daily_index(self):
tkrs = ["BHP.AX", "IMP.JO", "BP.L", "PNL.L", "INTC"]
intervals = ["1d", "1wk", "1mo"]
for tkr in tkrs:
dat = yf.Ticker(tkr, session=self.session)
for interval in intervals:
df = dat.history(period="5y", interval=interval)
f = df.index.time == _dt.time(0)
self.assertTrue(f.all())
def test_duplicatingHourly(self):
tkrs = ["IMP.JO", "BHG.JO", "SSW.JO", "BP.L", "INTC"]
for tkr in tkrs:
dat = yf.Ticker(tkr, session=self.session)
tz = dat._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
dt_utc = _tz.timezone("UTC").localize(_dt.datetime.utcnow())
dt = dt_utc.astimezone(_tz.timezone(tz))
start_d = dt.date() - _dt.timedelta(days=7)
df = dat.history(start=start_d, interval="1h")
dt0 = df.index[-2]
dt1 = df.index[-1]
try:
self.assertNotEqual(dt0.hour, dt1.hour)
except:
print("Ticker = ", tkr)
raise
def test_duplicatingDaily(self):
tkrs = ["IMP.JO", "BHG.JO", "SSW.JO", "BP.L", "INTC"]
test_run = False
for tkr in tkrs:
dat = yf.Ticker(tkr, session=self.session)
tz = dat._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
dt_utc = _tz.timezone("UTC").localize(_dt.datetime.utcnow())
dt = dt_utc.astimezone(_tz.timezone(tz))
if dt.time() < _dt.time(17, 0):
continue
test_run = True
df = dat.history(start=dt.date() - _dt.timedelta(days=7), interval="1d")
dt0 = df.index[-2]
dt1 = df.index[-1]
try:
self.assertNotEqual(dt0, dt1)
except:
print("Ticker = ", tkr)
raise
if not test_run:
self.skipTest("Skipping test_duplicatingDaily() because only expected to fail just after market close")
def test_duplicatingWeekly(self):
tkrs = ['MSFT', 'IWO', 'VFINX', '^GSPC', 'BTC-USD']
test_run = False
for tkr in tkrs:
dat = yf.Ticker(tkr, session=self.session)
tz = dat._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
dt = _tz.timezone(tz).localize(_dt.datetime.now())
if dt.date().weekday() not in [1, 2, 3, 4]:
continue
test_run = True
df = dat.history(start=dt.date() - _dt.timedelta(days=7), interval="1wk")
dt0 = df.index[-2]
dt1 = df.index[-1]
try:
self.assertNotEqual(dt0.week, dt1.week)
except:
print("Ticker={}: Last two rows within same week:".format(tkr))
print(df.iloc[df.shape[0] - 2:])
raise
if not test_run:
self.skipTest("Skipping test_duplicatingWeekly() because not possible to fail Monday/weekend")
def test_intraDayWithEvents(self):
# TASE dividend release pre-market, doesn't merge nicely with intra-day data so check still present
tase_tkrs = ["ICL.TA", "ESLT.TA", "ONE.TA", "MGDL.TA"]
test_run = False
for tkr in tase_tkrs:
start_d = _dt.date.today() - _dt.timedelta(days=59)
end_d = None
df_daily = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=True)
df_daily_divs = df_daily["Dividends"][df_daily["Dividends"] != 0]
if df_daily_divs.shape[0] == 0:
# self.skipTest("Skipping test_intraDayWithEvents() because 'ICL.TA' has no dividend in last 60 days")
continue
last_div_date = df_daily_divs.index[-1]
start_d = last_div_date.date()
end_d = last_div_date.date() + _dt.timedelta(days=1)
df = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="15m", actions=True)
self.assertTrue((df["Dividends"] != 0.0).any())
test_run = True
break
if not test_run:
self.skipTest("Skipping test_intraDayWithEvents() because no tickers had a dividend in last 60 days")
def test_dailyWithEvents(self):
# Reproduce issue #521
tkr1 = "QQQ"
tkr2 = "GDX"
start_d = "2014-12-29"
end_d = "2020-11-29"
df1 = yf.Ticker(tkr1).history(start=start_d, end=end_d, interval="1d", actions=True)
df2 = yf.Ticker(tkr2).history(start=start_d, end=end_d, interval="1d", actions=True)
self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any())
self.assertTrue(((df2["Dividends"] > 0) | (df2["Stock Splits"] > 0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{} missing these dates: {}".format(tkr1, missing_from_df1))
print("{} missing these dates: {}".format(tkr2, missing_from_df2))
raise
# Test that index same with and without events:
tkrs = [tkr1, tkr2]
for tkr in tkrs:
df1 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=True)
df2 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=False)
self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{}-with-events missing these dates: {}".format(tkr, missing_from_df1))
print("{}-without-events missing these dates: {}".format(tkr, missing_from_df2))
raise
def test_weeklyWithEvents(self):
# Reproduce issue #521
tkr1 = "QQQ"
tkr2 = "GDX"
start_d = "2014-12-29"
end_d = "2020-11-29"
df1 = yf.Ticker(tkr1).history(start=start_d, end=end_d, interval="1wk", actions=True)
df2 = yf.Ticker(tkr2).history(start=start_d, end=end_d, interval="1wk", actions=True)
self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any())
self.assertTrue(((df2["Dividends"] > 0) | (df2["Stock Splits"] > 0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{} missing these dates: {}".format(tkr1, missing_from_df1))
print("{} missing these dates: {}".format(tkr2, missing_from_df2))
raise
# Test that index same with and without events:
tkrs = [tkr1, tkr2]
for tkr in tkrs:
df1 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1wk", actions=True)
df2 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1wk", actions=False)
self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{}-with-events missing these dates: {}".format(tkr, missing_from_df1))
print("{}-without-events missing these dates: {}".format(tkr, missing_from_df2))
raise
def test_monthlyWithEvents(self):
tkr1 = "QQQ"
tkr2 = "GDX"
start_d = "2014-12-29"
end_d = "2020-11-29"
df1 = yf.Ticker(tkr1).history(start=start_d, end=end_d, interval="1mo", actions=True)
df2 = yf.Ticker(tkr2).history(start=start_d, end=end_d, interval="1mo", actions=True)
self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any())
self.assertTrue(((df2["Dividends"] > 0) | (df2["Stock Splits"] > 0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{} missing these dates: {}".format(tkr1, missing_from_df1))
print("{} missing these dates: {}".format(tkr2, missing_from_df2))
raise
# Test that index same with and without events:
tkrs = [tkr1, tkr2]
for tkr in tkrs:
df1 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1mo", actions=True)
df2 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1mo", actions=False)
self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{}-with-events missing these dates: {}".format(tkr, missing_from_df1))
print("{}-without-events missing these dates: {}".format(tkr, missing_from_df2))
raise
def test_tz_dst_ambiguous(self):
# Reproduce issue #1100
try:
yf.Ticker("ESLT.TA", session=self.session).history(start="2002-10-06", end="2002-10-09", interval="1d")
except _tz.exceptions.AmbiguousTimeError:
raise Exception("Ambiguous DST issue not resolved")
def test_dst_fix(self):
# Daily intervals should start at time 00:00. But for some combinations of date and timezone,
# Yahoo has time off by few hours (e.g. Brazil 23:00 around Jan-2022). Suspect DST problem.
# The clue is (a) minutes=0 and (b) hour near 0.
# Obviously Yahoo meant 00:00, so ensure this doesn't affect date conversion.
# The correction is successful if no days are weekend, and weekly data begins Monday
tkr = "AGRO3.SA"
dat = yf.Ticker(tkr, session=self.session)
start = "2021-01-11"
end = "2022-11-05"
interval = "1d"
df = dat.history(start=start, end=end, interval=interval)
self.assertTrue(((df.index.weekday >= 0) & (df.index.weekday <= 4)).all())
interval = "1wk"
df = dat.history(start=start, end=end, interval=interval)
try:
self.assertTrue((df.index.weekday == 0).all())
except:
print("Weekly data not aligned to Monday")
raise
def test_prune_post_intraday_us(self):
# Half-day before USA Thanksgiving. Yahoo normally
# returns an interval starting when regular trading closes,
# even if prepost=False.
# Setup
tkr = "AMZN"
interval = "1h"
interval_td = _dt.timedelta(hours=1)
time_open = _dt.time(9, 30)
time_close = _dt.time(16)
special_day = _dt.date(2022, 11, 25)
time_early_close = _dt.time(13)
dat = yf.Ticker(tkr, session=self.session)
# Run
start_d = special_day - _dt.timedelta(days=7)
end_d = special_day + _dt.timedelta(days=7)
df = dat.history(start=start_d, end=end_d, interval=interval, prepost=False, keepna=True)
tg_last_dt = df.loc[str(special_day)].index[-1]
self.assertTrue(tg_last_dt.time() < time_early_close)
# Test no other afternoons (or mornings) were pruned
start_d = _dt.date(special_day.year, 1, 1)
end_d = _dt.date(special_day.year+1, 1, 1)
df = dat.history(start=start_d, end=end_d, interval="1h", prepost=False, keepna=True)
last_dts = _pd.Series(df.index).groupby(df.index.date).last()
f_early_close = (last_dts+interval_td).dt.time < time_close
early_close_dates = last_dts.index[f_early_close].values
self.assertEqual(len(early_close_dates), 1)
self.assertEqual(early_close_dates[0], special_day)
first_dts = _pd.Series(df.index).groupby(df.index.date).first()
f_late_open = first_dts.dt.time > time_open
late_open_dates = first_dts.index[f_late_open]
self.assertEqual(len(late_open_dates), 0)
def test_prune_post_intraday_omx(self):
# Half-day before Sweden Christmas. Yahoo normally
# returns an interval starting when regular trading closes,
# even if prepost=False.
# If prepost=False, test that yfinance is removing prepost intervals.
# Setup
tkr = "AEC.ST"
interval = "1h"
interval_td = _dt.timedelta(hours=1)
time_open = _dt.time(9)
time_close = _dt.time(17,30)
special_day = _dt.date(2022, 12, 23)
time_early_close = _dt.time(13, 2)
dat = yf.Ticker(tkr, session=self.session)
# Half trading day Jan 5, Apr 14, May 25, Jun 23, Nov 4, Dec 23, Dec 30
half_days = [_dt.date(special_day.year, x[0], x[1]) for x in [(1,5), (4,14), (5,25), (6,23), (11,4), (12,23), (12,30)]]
# Yahoo has incorrectly classified afternoon of 2022-04-13 as post-market.
# Nothing yfinance can do because Yahoo doesn't return data with prepost=False.
# But need to handle in this test.
expected_incorrect_half_days = [_dt.date(2022,4,13)]
half_days = sorted(half_days+expected_incorrect_half_days)
# Run
start_d = special_day - _dt.timedelta(days=7)
end_d = special_day + _dt.timedelta(days=7)
df = dat.history(start=start_d, end=end_d, interval=interval, prepost=False, keepna=True)
tg_last_dt = df.loc[str(special_day)].index[-1]
self.assertTrue(tg_last_dt.time() < time_early_close)
# Test no other afternoons (or mornings) were pruned
start_d = _dt.date(special_day.year, 1, 1)
end_d = _dt.date(special_day.year+1, 1, 1)
df = dat.history(start=start_d, end=end_d, interval="1h", prepost=False, keepna=True)
last_dts = _pd.Series(df.index).groupby(df.index.date).last()
f_early_close = (last_dts+interval_td).dt.time < time_close
early_close_dates = last_dts.index[f_early_close].values
unexpected_early_close_dates = [d for d in early_close_dates if not d in half_days]
self.assertEqual(len(unexpected_early_close_dates), 0)
self.assertEqual(len(early_close_dates), len(half_days))
self.assertTrue(_np.equal(early_close_dates, half_days).all())
first_dts = _pd.Series(df.index).groupby(df.index.date).first()
f_late_open = first_dts.dt.time > time_open
late_open_dates = first_dts.index[f_late_open]
self.assertEqual(len(late_open_dates), 0)
def test_prune_post_intraday_asx(self):
# Setup
tkr = "BHP.AX"
interval = "1h"
interval_td = _dt.timedelta(hours=1)
time_open = _dt.time(10)
time_close = _dt.time(16,12)
# No early closes in 2022
dat = yf.Ticker(tkr, session=self.session)
# Test no afternoons (or mornings) were pruned
start_d = _dt.date(2022, 1, 1)
end_d = _dt.date(2022+1, 1, 1)
df = dat.history(start=start_d, end=end_d, interval="1h", prepost=False, keepna=True)
last_dts = _pd.Series(df.index).groupby(df.index.date).last()
f_early_close = (last_dts+interval_td).dt.time < time_close
early_close_dates = last_dts.index[f_early_close].values
self.assertEqual(len(early_close_dates), 0)
first_dts = _pd.Series(df.index).groupby(df.index.date).first()
f_late_open = first_dts.dt.time > time_open
late_open_dates = first_dts.index[f_late_open]
self.assertEqual(len(late_open_dates), 0)
def test_weekly_2rows_fix(self):
tkr = "AMZN"
start = _dt.date.today() - _dt.timedelta(days=14)
start -= _dt.timedelta(days=start.weekday())
dat = yf.Ticker(tkr)
df = dat.history(start=start, interval="1wk")
self.assertTrue((df.index.weekday == 0).all())
class TestPriceRepair(unittest.TestCase):
session = None
@classmethod
def setUpClass(cls):
cls.session = requests_cache.CachedSession(backend='memory')
@classmethod
def tearDownClass(cls):
if cls.session is not None:
cls.session.close()
def test_reconstruct_2m(self):
# 2m repair requires 1m data.
# Yahoo restricts 1m fetches to 7 days max within last 30 days.
# Need to test that '_reconstruct_intervals_batch()' can handle this.
tkrs = ["BHP.AX", "IMP.JO", "BP.L", "PNL.L", "INTC"]
dt_now = _pd.Timestamp.utcnow()
td_7d = _dt.timedelta(days=7)
td_60d = _dt.timedelta(days=60)
# Round time for 'requests_cache' reuse
dt_now = dt_now.ceil("1h")
for tkr in tkrs:
dat = yf.Ticker(tkr, session=self.session)
end_dt = dt_now
start_dt = end_dt - td_60d
df = dat.history(start=start_dt, end=end_dt, interval="2m", repair=True)
def test_repair_100x_weekly(self):
# Setup:
tkr = "PNL.L"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.fast_info["timezone"]
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
df = _pd.DataFrame(data={"Open": [470.5, 473.5, 474.5, 470],
"High": [476, 476.5, 477, 480],
"Low": [470.5, 470, 465.5, 468.26],
"Close": [475, 473.5, 472, 473.5],
"Adj Close": [475, 473.5, 472, 473.5],
"Volume": [2295613, 2245604, 3000287, 2635611]},
index=_pd.to_datetime([_dt.date(2022, 10, 24),
_dt.date(2022, 10, 17),
_dt.date(2022, 10, 10),
_dt.date(2022, 10, 3)]))
df = df.sort_index()
df.index.name = "Date"
df_bad = df.copy()
df_bad.loc["2022-10-24", "Close"] *= 100
df_bad.loc["2022-10-17", "Low"] *= 100
df_bad.loc["2022-10-03", "Open"] *= 100
df.index = df.index.tz_localize(tz_exchange)
df_bad.index = df_bad.index.tz_localize(tz_exchange)
# Run test
df_repaired = dat._fix_unit_mixups(df_bad, "1wk", tz_exchange, prepost=False)
# First test - no errors left
for c in data_cols:
try:
self.assertTrue(_np.isclose(df_repaired[c], df[c], rtol=1e-2).all())
except:
print(df[c])
print(df_repaired[c])
raise
# Second test - all differences should be either ~1x or ~100x
ratio = df_bad[data_cols].values / df[data_cols].values
ratio = ratio.round(2)
# - round near-100 ratio to 100:
f = ratio > 90
ratio[f] = (ratio[f] / 10).round().astype(int) * 10 # round ratio to nearest 10
# - now test
f_100 = ratio == 100
f_1 = ratio == 1
self.assertTrue((f_100 | f_1).all())
def test_repair_100x_weekly_preSplit(self):
# PNL.L has a stock-split in 2022. Sometimes requesting data before 2022 is not split-adjusted.
tkr = "PNL.L"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.fast_info["timezone"]
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
df = _pd.DataFrame(data={"Open": [400, 398, 392.5, 417],
"High": [421, 425, 419, 420.5],
"Low": [400, 380.5, 376.5, 396],
"Close": [410, 409.5, 402, 399],
"Adj Close": [398.02, 397.53, 390.25, 387.34],
"Volume": [3232600, 3773900, 10835000, 4257900]},
index=_pd.to_datetime([_dt.date(2020, 3, 30),
_dt.date(2020, 3, 23),
_dt.date(2020, 3, 16),
_dt.date(2020, 3, 9)]))
df = df.sort_index()
# Simulate data missing split-adjustment:
df[data_cols] *= 100.0
df["Volume"] *= 0.01
#
df.index.name = "Date"
# Create 100x errors:
df_bad = df.copy()
df_bad.loc["2020-03-30", "Close"] *= 100
df_bad.loc["2020-03-23", "Low"] *= 100
df_bad.loc["2020-03-09", "Open"] *= 100
df.index = df.index.tz_localize(tz_exchange)
df_bad.index = df_bad.index.tz_localize(tz_exchange)
df_repaired = dat._fix_unit_mixups(df_bad, "1wk", tz_exchange, prepost=False)
# First test - no errors left
for c in data_cols:
try:
self.assertTrue(_np.isclose(df_repaired[c], df[c], rtol=1e-2).all())
except:
print("Mismatch in column", c)
print("- df_repaired:")
print(df_repaired[c])
print("- answer:")
print(df[c])
raise
# Second test - all differences should be either ~1x or ~100x
ratio = df_bad[data_cols].values / df[data_cols].values
ratio = ratio.round(2)
# - round near-100 ratio to 100:
f = ratio > 90
ratio[f] = (ratio[f] / 10).round().astype(int) * 10 # round ratio to nearest 10
# - now test
f_100 = ratio == 100
f_1 = ratio == 1
self.assertTrue((f_100 | f_1).all())
def test_repair_100x_daily(self):
tkr = "PNL.L"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.fast_info["timezone"]
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
df = _pd.DataFrame(data={"Open": [478, 476, 476, 472],
"High": [478, 477.5, 477, 475],
"Low": [474.02, 474, 473, 470.75],
"Close": [475.5, 475.5, 474.5, 475],
"Adj Close": [475.5, 475.5, 474.5, 475],
"Volume": [436414, 485947, 358067, 287620]},
index=_pd.to_datetime([_dt.date(2022, 11, 1),
_dt.date(2022, 10, 31),
_dt.date(2022, 10, 28),
_dt.date(2022, 10, 27)]))
df = df.sort_index()
df.index.name = "Date"
df_bad = df.copy()
df_bad.loc["2022-11-01", "Close"] *= 100
df_bad.loc["2022-10-31", "Low"] *= 100
df_bad.loc["2022-10-27", "Open"] *= 100
df.index = df.index.tz_localize(tz_exchange)
df_bad.index = df_bad.index.tz_localize(tz_exchange)
df_repaired = dat._fix_unit_mixups(df_bad, "1d", tz_exchange, prepost=False)
# First test - no errors left
for c in data_cols:
self.assertTrue(_np.isclose(df_repaired[c], df[c], rtol=1e-2).all())
# Second test - all differences should be either ~1x or ~100x
ratio = df_bad[data_cols].values / df[data_cols].values
ratio = ratio.round(2)
# - round near-100 ratio to 100:
f = ratio > 90
ratio[f] = (ratio[f] / 10).round().astype(int) * 10 # round ratio to nearest 10
# - now test
f_100 = ratio == 100
f_1 = ratio == 1
self.assertTrue((f_100 | f_1).all())
def test_repair_zeroes_daily(self):
tkr = "BBIL.L"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.fast_info["timezone"]
df_bad = _pd.DataFrame(data={"Open": [0, 102.04, 102.04],
"High": [0, 102.1, 102.11],
"Low": [0, 102.04, 102.04],
"Close": [103.03, 102.05, 102.08],
"Adj Close": [102.03, 102.05, 102.08],
"Volume": [560, 137, 117]},
index=_pd.to_datetime([_dt.datetime(2022, 11, 1),
_dt.datetime(2022, 10, 31),
_dt.datetime(2022, 10, 30)]))
df_bad = df_bad.sort_index()
df_bad.index.name = "Date"
df_bad.index = df_bad.index.tz_localize(tz_exchange)
repaired_df = dat._fix_zeroes(df_bad, "1d", tz_exchange, prepost=False)
correct_df = df_bad.copy()
correct_df.loc["2022-11-01", "Open"] = 102.080002
correct_df.loc["2022-11-01", "Low"] = 102.032501
correct_df.loc["2022-11-01", "High"] = 102.080002
for c in ["Open", "Low", "High", "Close"]:
self.assertTrue(_np.isclose(repaired_df[c], correct_df[c], rtol=1e-8).all())
def test_repair_zeroes_hourly(self):
tkr = "INTC"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.fast_info["timezone"]
correct_df = dat.history(period="1wk", interval="1h", auto_adjust=False, repair=True)
df_bad = correct_df.copy()
bad_idx = correct_df.index[10]
df_bad.loc[bad_idx, "Open"] = _np.nan
df_bad.loc[bad_idx, "High"] = _np.nan
df_bad.loc[bad_idx, "Low"] = _np.nan
df_bad.loc[bad_idx, "Close"] = _np.nan
df_bad.loc[bad_idx, "Adj Close"] = _np.nan
df_bad.loc[bad_idx, "Volume"] = 0
repaired_df = dat._fix_zeroes(df_bad, "1h", tz_exchange, prepost=False)
for c in ["Open", "Low", "High", "Close"]:
try:
self.assertTrue(_np.isclose(repaired_df[c], correct_df[c], rtol=1e-7).all())
except:
print("COLUMN", c)
print("- repaired_df")
print(repaired_df)
print("- correct_df[c]:")
print(correct_df[c])
print("- diff:")
print(repaired_df[c] - correct_df[c])
raise
if __name__ == '__main__':
unittest.main()
# # Run tests sequentially:
# import inspect
# test_src = inspect.getsource(TestPriceHistory)
# unittest.TestLoader.sortTestMethodsUsing = lambda _, x, y: (
# test_src.index(f"def {x}") - test_src.index(f"def {y}")
# )
# unittest.main(verbosity=2)

819
tests/ticker.py Normal file
View File

@@ -0,0 +1,819 @@
"""
Tests for Ticker
To run all tests in suite from commandline:
python -m unittest tests.ticker
Specific test class:
python -m unittest tests.ticker.TestTicker
"""
import pandas as pd
import numpy as np
from .context import yfinance as yf
import unittest
import requests_cache
# Set this to see the exact requests that are made during tests
DEBUG_LOG_REQUESTS = False
if DEBUG_LOG_REQUESTS:
import logging
logging.basicConfig(level=logging.DEBUG)
class TestTicker(unittest.TestCase):
session = None
@classmethod
def setUpClass(cls):
cls.session = requests_cache.CachedSession(backend='memory')
@classmethod
def tearDownClass(cls):
if cls.session is not None:
cls.session.close()
def test_getTz(self):
tkrs = ["IMP.JO", "BHG.JO", "SSW.JO", "BP.L", "INTC"]
for tkr in tkrs:
# First step: remove ticker from tz-cache
yf.utils.get_tz_cache().store(tkr, None)
# Test:
dat = yf.Ticker(tkr, session=self.session)
tz = dat._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
self.assertIsNotNone(tz)
def test_badTicker(self):
# Check yfinance doesn't die when ticker delisted
tkr = "DJI" # typo of "^DJI"
dat = yf.Ticker(tkr, session=self.session)
dat.history(period="1wk")
dat.history(start="2022-01-01")
dat.history(start="2022-01-01", end="2022-03-01")
yf.download([tkr], period="1wk")
for k in dat.fast_info:
dat.fast_info[k]
dat.isin
dat.major_holders
dat.institutional_holders
dat.mutualfund_holders
dat.dividends
dat.splits
dat.actions
dat.shares
dat.get_shares_full()
dat.info
dat.calendar
dat.recommendations
dat.earnings
dat.quarterly_earnings
dat.income_stmt
dat.quarterly_income_stmt
dat.balance_sheet
dat.quarterly_balance_sheet
dat.cashflow
dat.quarterly_cashflow
dat.recommendations_summary
dat.analyst_price_target
dat.revenue_forecasts
dat.sustainability
dat.options
dat.news
dat.earnings_trend
dat.earnings_dates
dat.earnings_forecasts
def test_goodTicker(self):
# that yfinance works when full api is called on same instance of ticker
tkrs = ["IBM"]
tkrs.append("QCSTIX") # weird ticker, no price history but has previous close
for tkr in tkrs:
dat = yf.Ticker(tkr, session=self.session)
dat.history(period="1wk")
dat.history(start="2022-01-01")
dat.history(start="2022-01-01", end="2022-03-01")
yf.download([tkr], period="1wk")
for k in dat.fast_info:
dat.fast_info[k]
dat.isin
dat.major_holders
dat.institutional_holders
dat.mutualfund_holders
dat.dividends
dat.splits
dat.actions
dat.shares
dat.get_shares_full()
dat.info
dat.calendar
dat.recommendations
dat.earnings
dat.quarterly_earnings
dat.income_stmt
dat.quarterly_income_stmt
dat.balance_sheet
dat.quarterly_balance_sheet
dat.cashflow
dat.quarterly_cashflow
dat.recommendations_summary
dat.analyst_price_target
dat.revenue_forecasts
dat.sustainability
dat.options
dat.news
dat.earnings_trend
dat.earnings_dates
dat.earnings_forecasts
class TestTickerHistory(unittest.TestCase):
session = None
@classmethod
def setUpClass(cls):
cls.session = requests_cache.CachedSession(backend='memory')
@classmethod
def tearDownClass(cls):
if cls.session is not None:
cls.session.close()
def setUp(self):
# use a ticker that has dividends
self.ticker = yf.Ticker("IBM", session=self.session)
def tearDown(self):
self.ticker = None
def test_history(self):
with self.assertRaises(RuntimeError):
self.ticker.history_metadata
data = self.ticker.history("1y")
self.assertIn("IBM", self.ticker.history_metadata.values(), "metadata missing")
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
def test_no_expensive_calls_introduced(self):
"""
Make sure calling history to get price data has not introduced more calls to yahoo than absolutely necessary.
As doing other type of scraping calls than "query2.finance.yahoo.com/v8/finance/chart" to yahoo website
will quickly trigger spam-block when doing bulk download of history data.
"""
session = requests_cache.CachedSession(backend='memory')
ticker = yf.Ticker("GOOGL", session=session)
ticker.history("1y")
actual_urls_called = tuple([r.url for r in session.cache.filter()])
session.close()
expected_urls = (
'https://query2.finance.yahoo.com/v8/finance/chart/GOOGL?range=1y&interval=1d&includePrePost=False&events=div%2Csplits%2CcapitalGains',
)
self.assertEqual(expected_urls, actual_urls_called, "Different than expected url used to fetch history.")
def test_dividends(self):
data = self.ticker.dividends
self.assertIsInstance(data, pd.Series, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
def test_splits(self):
data = self.ticker.splits
self.assertIsInstance(data, pd.Series, "data has wrong type")
# self.assertFalse(data.empty, "data is empty")
def test_actions(self):
data = self.ticker.actions
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
class TestTickerEarnings(unittest.TestCase):
session = None
@classmethod
def setUpClass(cls):
cls.session = requests_cache.CachedSession(backend='memory')
@classmethod
def tearDownClass(cls):
if cls.session is not None:
cls.session.close()
def setUp(self):
self.ticker = yf.Ticker("GOOGL", session=self.session)
def tearDown(self):
self.ticker = None
def test_earnings(self):
data = self.ticker.earnings
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.earnings
self.assertIs(data, data_cached, "data not cached")
def test_quarterly_earnings(self):
data = self.ticker.quarterly_earnings
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.quarterly_earnings
self.assertIs(data, data_cached, "data not cached")
def test_earnings_forecasts(self):
data = self.ticker.earnings_forecasts
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.earnings_forecasts
self.assertIs(data, data_cached, "data not cached")
def test_earnings_dates(self):
data = self.ticker.earnings_dates
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.earnings_dates
self.assertIs(data, data_cached, "data not cached")
def test_earnings_trend(self):
data = self.ticker.earnings_trend
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.earnings_trend
self.assertIs(data, data_cached, "data not cached")
def test_earnings_dates_with_limit(self):
# use ticker with lots of historic earnings
ticker = yf.Ticker("IBM")
limit = 110
data = ticker.get_earnings_dates(limit=limit)
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
self.assertEqual(len(data), limit, "Wrong number or rows")
data_cached = ticker.get_earnings_dates(limit=limit)
self.assertIs(data, data_cached, "data not cached")
class TestTickerHolders(unittest.TestCase):
session = None
@classmethod
def setUpClass(cls):
cls.session = requests_cache.CachedSession(backend='memory')
@classmethod
def tearDownClass(cls):
if cls.session is not None:
cls.session.close()
def setUp(self):
self.ticker = yf.Ticker("GOOGL", session=self.session)
def tearDown(self):
self.ticker = None
def test_major_holders(self):
data = self.ticker.major_holders
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.major_holders
self.assertIs(data, data_cached, "data not cached")
def test_institutional_holders(self):
data = self.ticker.institutional_holders
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.institutional_holders
self.assertIs(data, data_cached, "data not cached")
def test_mutualfund_holders(self):
data = self.ticker.mutualfund_holders
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.mutualfund_holders
self.assertIs(data, data_cached, "data not cached")
class TestTickerMiscFinancials(unittest.TestCase):
session = None
@classmethod
def setUpClass(cls):
cls.session = requests_cache.CachedSession(backend='memory')
@classmethod
def tearDownClass(cls):
if cls.session is not None:
cls.session.close()
def setUp(self):
self.ticker = yf.Ticker("GOOGL", session=self.session)
# For ticker 'BSE.AX' (and others), Yahoo not returning
# full quarterly financials (usually cash-flow) with all entries,
# instead returns a smaller version in different data store.
self.ticker_old_fmt = yf.Ticker("BSE.AX", session=self.session)
def tearDown(self):
self.ticker = None
def test_income_statement(self):
expected_keys = ["Total Revenue", "Basic EPS"]
expected_periods_days = 365
# Test contents of table
data = self.ticker.get_income_stmt(pretty=True)
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
period = abs((data.columns[0]-data.columns[1]).days)
self.assertLess(abs(period-expected_periods_days), 20, "Not returning annual financials")
# Test property defaults
data2 = self.ticker.income_stmt
self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
# Test pretty=False
expected_keys = [k.replace(' ', '') for k in expected_keys]
data = self.ticker.get_income_stmt(pretty=False)
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
# Test to_dict
data = self.ticker.get_income_stmt(as_dict=True)
self.assertIsInstance(data, dict, "data has wrong type")
def test_quarterly_income_statement(self):
expected_keys = ["Total Revenue", "Basic EPS"]
expected_periods_days = 365//4
# Test contents of table
data = self.ticker.get_income_stmt(pretty=True, freq="quarterly")
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
period = abs((data.columns[0]-data.columns[1]).days)
self.assertLess(abs(period-expected_periods_days), 20, "Not returning quarterly financials")
# Test property defaults
data2 = self.ticker.quarterly_income_stmt
self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
# Test pretty=False
expected_keys = [k.replace(' ', '') for k in expected_keys]
data = self.ticker.get_income_stmt(pretty=False, freq="quarterly")
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
# Test to_dict
data = self.ticker.get_income_stmt(as_dict=True)
self.assertIsInstance(data, dict, "data has wrong type")
def test_quarterly_income_statement_old_fmt(self):
expected_row = "TotalRevenue"
data = self.ticker_old_fmt.get_income_stmt(freq="quarterly", legacy=True)
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
self.assertIn(expected_row, data.index, "Did not find expected row in index")
data_cached = self.ticker_old_fmt.get_income_stmt(freq="quarterly", legacy=True)
self.assertIs(data, data_cached, "data not cached")
def test_balance_sheet(self):
expected_keys = ["Total Assets", "Net PPE"]
expected_periods_days = 365
# Test contents of table
data = self.ticker.get_balance_sheet(pretty=True)
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
period = abs((data.columns[0]-data.columns[1]).days)
self.assertLess(abs(period-expected_periods_days), 20, "Not returning annual financials")
# Test property defaults
data2 = self.ticker.balance_sheet
self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
# Test pretty=False
expected_keys = [k.replace(' ', '') for k in expected_keys]
data = self.ticker.get_balance_sheet(pretty=False)
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
# Test to_dict
data = self.ticker.get_balance_sheet(as_dict=True)
self.assertIsInstance(data, dict, "data has wrong type")
def test_quarterly_balance_sheet(self):
expected_keys = ["Total Assets", "Net PPE"]
expected_periods_days = 365//4
# Test contents of table
data = self.ticker.get_balance_sheet(pretty=True, freq="quarterly")
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
period = abs((data.columns[0]-data.columns[1]).days)
self.assertLess(abs(period-expected_periods_days), 20, "Not returning quarterly financials")
# Test property defaults
data2 = self.ticker.quarterly_balance_sheet
self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
# Test pretty=False
expected_keys = [k.replace(' ', '') for k in expected_keys]
data = self.ticker.get_balance_sheet(pretty=False, freq="quarterly")
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
# Test to_dict
data = self.ticker.get_balance_sheet(as_dict=True, freq="quarterly")
self.assertIsInstance(data, dict, "data has wrong type")
def test_quarterly_balance_sheet_old_fmt(self):
expected_row = "TotalAssets"
data = self.ticker_old_fmt.get_balance_sheet(freq="quarterly", legacy=True)
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
self.assertIn(expected_row, data.index, "Did not find expected row in index")
data_cached = self.ticker_old_fmt.get_balance_sheet(freq="quarterly", legacy=True)
self.assertIs(data, data_cached, "data not cached")
def test_cash_flow(self):
expected_keys = ["Operating Cash Flow", "Net PPE Purchase And Sale"]
expected_periods_days = 365
# Test contents of table
data = self.ticker.get_cashflow(pretty=True)
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
period = abs((data.columns[0]-data.columns[1]).days)
self.assertLess(abs(period-expected_periods_days), 20, "Not returning annual financials")
# Test property defaults
data2 = self.ticker.cashflow
self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
# Test pretty=False
expected_keys = [k.replace(' ', '') for k in expected_keys]
data = self.ticker.get_cashflow(pretty=False)
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
# Test to_dict
data = self.ticker.get_cashflow(as_dict=True)
self.assertIsInstance(data, dict, "data has wrong type")
def test_quarterly_cash_flow(self):
expected_keys = ["Operating Cash Flow", "Net PPE Purchase And Sale"]
expected_periods_days = 365//4
# Test contents of table
data = self.ticker.get_cashflow(pretty=True, freq="quarterly")
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
period = abs((data.columns[0]-data.columns[1]).days)
self.assertLess(abs(period-expected_periods_days), 20, "Not returning quarterly financials")
# Test property defaults
data2 = self.ticker.quarterly_cashflow
self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
# Test pretty=False
expected_keys = [k.replace(' ', '') for k in expected_keys]
data = self.ticker.get_cashflow(pretty=False, freq="quarterly")
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
# Test to_dict
data = self.ticker.get_cashflow(as_dict=True)
self.assertIsInstance(data, dict, "data has wrong type")
def test_quarterly_cashflow_old_fmt(self):
expected_row = "NetIncome"
data = self.ticker_old_fmt.get_cashflow(legacy=True, freq="quarterly")
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
self.assertIn(expected_row, data.index, "Did not find expected row in index")
data_cached = self.ticker_old_fmt.get_cashflow(legacy=True, freq="quarterly")
self.assertIs(data, data_cached, "data not cached")
def test_income_alt_names(self):
i1 = self.ticker.income_stmt
i2 = self.ticker.incomestmt
self.assertTrue(i1.equals(i2))
i3 = self.ticker.financials
self.assertTrue(i1.equals(i3))
i1 = self.ticker.get_income_stmt()
i2 = self.ticker.get_incomestmt()
self.assertTrue(i1.equals(i2))
i3 = self.ticker.get_financials()
self.assertTrue(i1.equals(i3))
i1 = self.ticker.quarterly_income_stmt
i2 = self.ticker.quarterly_incomestmt
self.assertTrue(i1.equals(i2))
i3 = self.ticker.quarterly_financials
self.assertTrue(i1.equals(i3))
i1 = self.ticker.get_income_stmt(freq="quarterly")
i2 = self.ticker.get_incomestmt(freq="quarterly")
self.assertTrue(i1.equals(i2))
i3 = self.ticker.get_financials(freq="quarterly")
self.assertTrue(i1.equals(i3))
def test_balance_sheet_alt_names(self):
i1 = self.ticker.balance_sheet
i2 = self.ticker.balancesheet
self.assertTrue(i1.equals(i2))
i1 = self.ticker.get_balance_sheet()
i2 = self.ticker.get_balancesheet()
self.assertTrue(i1.equals(i2))
i1 = self.ticker.quarterly_balance_sheet
i2 = self.ticker.quarterly_balancesheet
self.assertTrue(i1.equals(i2))
i1 = self.ticker.get_balance_sheet(freq="quarterly")
i2 = self.ticker.get_balancesheet(freq="quarterly")
self.assertTrue(i1.equals(i2))
def test_cash_flow_alt_names(self):
i1 = self.ticker.cash_flow
i2 = self.ticker.cashflow
self.assertTrue(i1.equals(i2))
i1 = self.ticker.get_cash_flow()
i2 = self.ticker.get_cashflow()
self.assertTrue(i1.equals(i2))
i1 = self.ticker.quarterly_cash_flow
i2 = self.ticker.quarterly_cashflow
self.assertTrue(i1.equals(i2))
i1 = self.ticker.get_cash_flow(freq="quarterly")
i2 = self.ticker.get_cashflow(freq="quarterly")
self.assertTrue(i1.equals(i2))
def test_sustainability(self):
data = self.ticker.sustainability
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.sustainability
self.assertIs(data, data_cached, "data not cached")
def test_recommendations(self):
data = self.ticker.recommendations
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.recommendations
self.assertIs(data, data_cached, "data not cached")
def test_recommendations_summary(self):
data = self.ticker.recommendations_summary
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.recommendations_summary
self.assertIs(data, data_cached, "data not cached")
def test_analyst_price_target(self):
data = self.ticker.analyst_price_target
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.analyst_price_target
self.assertIs(data, data_cached, "data not cached")
def test_revenue_forecasts(self):
data = self.ticker.revenue_forecasts
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.revenue_forecasts
self.assertIs(data, data_cached, "data not cached")
def test_calendar(self):
data = self.ticker.calendar
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.calendar
self.assertIs(data, data_cached, "data not cached")
def test_isin(self):
data = self.ticker.isin
self.assertIsInstance(data, str, "data has wrong type")
self.assertEqual("ARDEUT116159", data, "data is empty")
data_cached = self.ticker.isin
self.assertIs(data, data_cached, "data not cached")
def test_options(self):
data = self.ticker.options
self.assertIsInstance(data, tuple, "data has wrong type")
self.assertTrue(len(data) > 1, "data is empty")
def test_shares(self):
data = self.ticker.shares
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
def test_shares_full(self):
data = self.ticker.get_shares_full()
self.assertIsInstance(data, pd.Series, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
def test_bad_freq_value_raises_exception(self):
self.assertRaises(ValueError, lambda: self.ticker.get_cashflow(freq="badarg"))
class TestTickerInfo(unittest.TestCase):
session = None
@classmethod
def setUpClass(cls):
cls.session = requests_cache.CachedSession(backend='memory')
@classmethod
def tearDownClass(cls):
if cls.session is not None:
cls.session.close()
def setUp(self):
self.symbols = []
self.symbols += ["ESLT.TA", "BP.L", "GOOGL"]
self.symbols.append("QCSTIX") # good for testing, doesn't trade
self.symbols += ["BTC-USD", "IWO", "VFINX", "^GSPC"]
self.symbols += ["SOKE.IS", "ADS.DE"] # detected bugs
self.tickers = [yf.Ticker(s, session=self.session) for s in self.symbols]
def tearDown(self):
self.ticker = None
def test_info(self):
data = self.tickers[0].info
self.assertIsInstance(data, dict, "data has wrong type")
self.assertIn("symbol", data.keys(), "Did not find expected key in info dict")
self.assertEqual(self.symbols[0], data["symbol"], "Wrong symbol value in info dict")
def test_fast_info(self):
yf.scrapers.quote.PRUNE_INFO = False
fast_info_keys = set()
for ticker in self.tickers:
fast_info_keys.update(set(ticker.fast_info.keys()))
fast_info_keys = sorted(list(fast_info_keys))
key_rename_map = {}
key_rename_map["currency"] = "currency"
key_rename_map["quote_type"] = "quoteType"
key_rename_map["timezone"] = "exchangeTimezoneName"
key_rename_map["last_price"] = ["currentPrice", "regularMarketPrice"]
key_rename_map["open"] = ["open", "regularMarketOpen"]
key_rename_map["day_high"] = ["dayHigh", "regularMarketDayHigh"]
key_rename_map["day_low"] = ["dayLow", "regularMarketDayLow"]
key_rename_map["previous_close"] = ["previousClose"]
key_rename_map["regular_market_previous_close"] = ["regularMarketPreviousClose"]
key_rename_map["fifty_day_average"] = "fiftyDayAverage"
key_rename_map["two_hundred_day_average"] = "twoHundredDayAverage"
key_rename_map["year_change"] = ["52WeekChange", "fiftyTwoWeekChange"]
key_rename_map["year_high"] = "fiftyTwoWeekHigh"
key_rename_map["year_low"] = "fiftyTwoWeekLow"
key_rename_map["last_volume"] = ["volume", "regularMarketVolume"]
key_rename_map["ten_day_average_volume"] = ["averageVolume10days", "averageDailyVolume10Day"]
key_rename_map["three_month_average_volume"] = "averageVolume"
key_rename_map["market_cap"] = "marketCap"
key_rename_map["shares"] = "sharesOutstanding"
for k in list(key_rename_map.keys()):
if '_' in k:
key_rename_map[yf.utils.snake_case_2_camelCase(k)] = key_rename_map[k]
# Note: share count items in info[] are bad. Sometimes the float > outstanding!
# So often fast_info["shares"] does not match.
# Why isn't fast_info["shares"] wrong? Because using it to calculate market cap always correct.
bad_keys = {"shares"}
# Loose tolerance for averages, no idea why don't match info[]. Is info wrong?
custom_tolerances = {}
custom_tolerances["year_change"] = 1.0
# custom_tolerances["ten_day_average_volume"] = 1e-3
custom_tolerances["ten_day_average_volume"] = 1e-1
# custom_tolerances["three_month_average_volume"] = 1e-2
custom_tolerances["three_month_average_volume"] = 5e-1
custom_tolerances["fifty_day_average"] = 1e-2
custom_tolerances["two_hundred_day_average"] = 1e-2
for k in list(custom_tolerances.keys()):
if '_' in k:
custom_tolerances[yf.utils.snake_case_2_camelCase(k)] = custom_tolerances[k]
for k in fast_info_keys:
if k in key_rename_map:
k2 = key_rename_map[k]
else:
k2 = k
if not isinstance(k2, list):
k2 = [k2]
for m in k2:
for ticker in self.tickers:
if not m in ticker.info:
# print(f"symbol={ticker.ticker}: fast_info key '{k}' mapped to info key '{m}' but not present in info")
continue
if k in bad_keys:
continue
if k in custom_tolerances:
rtol = custom_tolerances[k]
else:
rtol = 5e-3
# rtol = 1e-4
correct = ticker.info[m]
test = ticker.fast_info[k]
# print(f"Testing: symbol={ticker.ticker} m={m} k={k}: test={test} vs correct={correct}")
if k in ["market_cap","marketCap"] and ticker.fast_info["currency"] in ["GBp", "ILA"]:
# Adjust for currency to match Yahoo:
test *= 0.01
try:
if correct is None:
self.assertTrue(test is None or (not np.isnan(test)), f"{k}: {test} must be None or real value because correct={correct}")
elif isinstance(test, float) or isinstance(correct, int):
self.assertTrue(np.isclose(test, correct, rtol=rtol), f"{ticker.ticker} {k}: {test} != {correct}")
else:
self.assertEqual(test, correct, f"{k}: {test} != {correct}")
except:
if k in ["regularMarketPreviousClose"] and ticker.ticker in ["ADS.DE"]:
# Yahoo is wrong, is returning post-market close not regular
continue
else:
raise
def suite():
suite = unittest.TestSuite()
suite.addTest(TestTicker('Test ticker'))
suite.addTest(TestTickerEarnings('Test earnings'))
suite.addTest(TestTickerHolders('Test holders'))
suite.addTest(TestTickerHistory('Test Ticker history'))
suite.addTest(TestTickerMiscFinancials('Test misc financials'))
suite.addTest(TestTickerInfo('Test info & fast_info'))
return suite
if __name__ == '__main__':
unittest.main()

File diff suppressed because it is too large Load Diff

324
yfinance/data.py Normal file
View File

@@ -0,0 +1,324 @@
import functools
from functools import lru_cache
import hashlib
from base64 import b64decode
usePycryptodome = False # slightly faster
# usePycryptodome = True
if usePycryptodome:
from Crypto.Cipher import AES
from Crypto.Util.Padding import unpad
else:
from cryptography.hazmat.primitives import padding
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
import requests as requests
import re
from bs4 import BeautifulSoup
from frozendict import frozendict
try:
import ujson as json
except ImportError:
import json as json
cache_maxsize = 64
def lru_cache_freezeargs(func):
"""
Decorator transforms mutable dictionary and list arguments into immutable types
Needed so lru_cache can cache method calls what has dict or list arguments.
"""
@functools.wraps(func)
def wrapped(*args, **kwargs):
args = tuple([frozendict(arg) if isinstance(arg, dict) else arg for arg in args])
kwargs = {k: frozendict(v) if isinstance(v, dict) else v for k, v in kwargs.items()}
args = tuple([tuple(arg) if isinstance(arg, list) else arg for arg in args])
kwargs = {k: tuple(v) if isinstance(v, list) else v for k, v in kwargs.items()}
return func(*args, **kwargs)
# copy over the lru_cache extra methods to this wrapper to be able to access them
# after this decorator has been applied
wrapped.cache_info = func.cache_info
wrapped.cache_clear = func.cache_clear
return wrapped
def _extract_extra_keys_from_stores(data):
new_keys = [k for k in data.keys() if k not in ["context", "plugins"]]
new_keys_values = set([data[k] for k in new_keys])
# Maybe multiple keys have same value - keep one of each
new_keys_uniq = []
new_keys_uniq_values = set()
for k in new_keys:
v = data[k]
if not v in new_keys_uniq_values:
new_keys_uniq.append(k)
new_keys_uniq_values.add(v)
return [data[k] for k in new_keys_uniq]
def decrypt_cryptojs_aes_stores(data, keys=None):
encrypted_stores = data['context']['dispatcher']['stores']
password = None
if keys is not None:
if not isinstance(keys, list):
raise TypeError("'keys' must be list")
candidate_passwords = keys
else:
candidate_passwords = []
if "_cs" in data and "_cr" in data:
_cs = data["_cs"]
_cr = data["_cr"]
_cr = b"".join(int.to_bytes(i, length=4, byteorder="big", signed=True) for i in json.loads(_cr)["words"])
password = hashlib.pbkdf2_hmac("sha1", _cs.encode("utf8"), _cr, 1, dklen=32).hex()
encrypted_stores = b64decode(encrypted_stores)
assert encrypted_stores[0:8] == b"Salted__"
salt = encrypted_stores[8:16]
encrypted_stores = encrypted_stores[16:]
def _EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="md5") -> tuple:
"""OpenSSL EVP Key Derivation Function
Args:
password (Union[str, bytes, bytearray]): Password to generate key from.
salt (Union[bytes, bytearray]): Salt to use.
keySize (int, optional): Output key length in bytes. Defaults to 32.
ivSize (int, optional): Output Initialization Vector (IV) length in bytes. Defaults to 16.
iterations (int, optional): Number of iterations to perform. Defaults to 1.
hashAlgorithm (str, optional): Hash algorithm to use for the KDF. Defaults to 'md5'.
Returns:
key, iv: Derived key and Initialization Vector (IV) bytes.
Taken from: https://gist.github.com/rafiibrahim8/0cd0f8c46896cafef6486cb1a50a16d3
OpenSSL original code: https://github.com/openssl/openssl/blob/master/crypto/evp/evp_key.c#L78
"""
assert iterations > 0, "Iterations can not be less than 1."
if isinstance(password, str):
password = password.encode("utf-8")
final_length = keySize + ivSize
key_iv = b""
block = None
while len(key_iv) < final_length:
hasher = hashlib.new(hashAlgorithm)
if block:
hasher.update(block)
hasher.update(password)
hasher.update(salt)
block = hasher.digest()
for _ in range(1, iterations):
block = hashlib.new(hashAlgorithm, block).digest()
key_iv += block
key, iv = key_iv[:keySize], key_iv[keySize:final_length]
return key, iv
def _decrypt(encrypted_stores, password, key, iv):
if usePycryptodome:
cipher = AES.new(key, AES.MODE_CBC, iv=iv)
plaintext = cipher.decrypt(encrypted_stores)
plaintext = unpad(plaintext, 16, style="pkcs7")
else:
cipher = Cipher(algorithms.AES(key), modes.CBC(iv))
decryptor = cipher.decryptor()
plaintext = decryptor.update(encrypted_stores) + decryptor.finalize()
unpadder = padding.PKCS7(128).unpadder()
plaintext = unpadder.update(plaintext) + unpadder.finalize()
plaintext = plaintext.decode("utf-8")
return plaintext
if not password is None:
try:
key, iv = _EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="md5")
except:
raise Exception("yfinance failed to decrypt Yahoo data response")
plaintext = _decrypt(encrypted_stores, password, key, iv)
else:
success = False
for i in range(len(candidate_passwords)):
# print(f"Trying candiate pw {i+1}/{len(candidate_passwords)}")
password = candidate_passwords[i]
try:
key, iv = _EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="md5")
plaintext = _decrypt(encrypted_stores, password, key, iv)
success = True
break
except:
pass
if not success:
raise Exception("yfinance failed to decrypt Yahoo data response")
decoded_stores = json.loads(plaintext)
return decoded_stores
_SCRAPE_URL_ = 'https://finance.yahoo.com/quote'
class TickerData:
"""
Have one place to retrieve data from Yahoo API in order to ease caching and speed up operations
"""
user_agent_headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
def __init__(self, ticker: str, session=None):
self.ticker = ticker
self._session = session or requests
def get(self, url, user_agent_headers=None, params=None, proxy=None, timeout=30):
proxy = self._get_proxy(proxy)
response = self._session.get(
url=url,
params=params,
proxies=proxy,
timeout=timeout,
headers=user_agent_headers or self.user_agent_headers)
return response
@lru_cache_freezeargs
@lru_cache(maxsize=cache_maxsize)
def cache_get(self, url, user_agent_headers=None, params=None, proxy=None, timeout=30):
return self.get(url, user_agent_headers, params, proxy, timeout)
def _get_proxy(self, proxy):
# setup proxy in requests format
if proxy is not None:
if isinstance(proxy, dict) and "https" in proxy:
proxy = proxy["https"]
proxy = {"https": proxy}
return proxy
def _get_decryption_keys_from_yahoo_js(self, soup):
result = None
key_count = 4
re_script = soup.find("script", string=re.compile("root.App.main")).text
re_data = json.loads(re.search("root.App.main\s+=\s+(\{.*\})", re_script).group(1))
re_data.pop("context", None)
key_list = list(re_data.keys())
if re_data.get("plugins"): # 1) attempt to get last 4 keys after plugins
ind = key_list.index("plugins")
if len(key_list) > ind+1:
sub_keys = key_list[ind+1:]
if len(sub_keys) == key_count:
re_obj = {}
missing_val = False
for k in sub_keys:
if not re_data.get(k):
missing_val = True
break
re_obj.update({k: re_data.get(k)})
if not missing_val:
result = re_obj
if not result is None:
return [''.join(result.values())]
re_keys = [] # 2) attempt scan main.js file approach to get keys
prefix = "https://s.yimg.com/uc/finance/dd-site/js/main."
tags = [tag['src'] for tag in soup.find_all('script') if prefix in tag.get('src', '')]
for t in tags:
response_js = self.cache_get(t)
#
if response_js.status_code != 200:
time.sleep(random.randrange(10, 20))
response_js.close()
else:
r_data = response_js.content.decode("utf8")
re_list = [
x.group() for x in re.finditer(r"context.dispatcher.stores=JSON.parse((?:.*?\r?\n?)*)toString", r_data)
]
for rl in re_list:
re_sublist = [x.group() for x in re.finditer(r"t\[\"((?:.*?\r?\n?)*)\"\]", rl)]
if len(re_sublist) == key_count:
re_keys = [sl.replace('t["', '').replace('"]', '') for sl in re_sublist]
break
response_js.close()
if len(re_keys) == key_count:
break
if len(re_keys) > 0:
re_obj = {}
missing_val = False
for k in re_keys:
if not re_data.get(k):
missing_val = True
break
re_obj.update({k: re_data.get(k)})
if not missing_val:
return [''.join(re_obj.values())]
return []
@lru_cache_freezeargs
@lru_cache(maxsize=cache_maxsize)
def get_json_data_stores(self, sub_page: str = None, proxy=None) -> dict:
'''
get_json_data_stores returns a python dictionary of the data stores in yahoo finance web page.
'''
if sub_page:
ticker_url = "{}/{}/{}".format(_SCRAPE_URL_, self.ticker, sub_page)
else:
ticker_url = "{}/{}".format(_SCRAPE_URL_, self.ticker)
response = self.get(url=ticker_url, proxy=proxy)
html = response.text
# The actual json-data for stores is in a javascript assignment in the webpage
try:
json_str = html.split('root.App.main =')[1].split(
'(this)')[0].split(';\n}')[0].strip()
except IndexError:
# Fetch failed, probably because Yahoo spam triggered
return {}
data = json.loads(json_str)
# Gather decryption keys:
soup = BeautifulSoup(response.content, "html.parser")
keys = self._get_decryption_keys_from_yahoo_js(soup)
# if len(keys) == 0:
# msg = "No decryption keys could be extracted from JS file."
# if "requests_cache" in str(type(response)):
# msg += " Try flushing your 'requests_cache', probably parsing old JS."
# print("WARNING: " + msg + " Falling back to backup decrypt methods.")
if len(keys) == 0:
keys = []
try:
extra_keys = _extract_extra_keys_from_stores(data)
keys = [''.join(extra_keys[-4:])]
except:
pass
#
keys_url = "https://github.com/ranaroussi/yfinance/raw/main/yfinance/scrapers/yahoo-keys.txt"
response_gh = self.cache_get(keys_url)
keys += response_gh.text.splitlines()
# Decrypt!
stores = decrypt_cryptojs_aes_stores(data, keys)
if stores is None:
# Maybe Yahoo returned old format, not encrypted
if "context" in data and "dispatcher" in data["context"]:
stores = data['context']['dispatcher']['stores']
if stores is None:
raise Exception(f"{self.ticker}: Failed to extract data stores from web request")
# return data
new_data = json.dumps(stores).replace('{}', 'null')
new_data = re.sub(
r'{[\'|\"]raw[\'|\"]:(.*?),(.*?)}', r'\1', new_data)
return json.loads(new_data)

6
yfinance/exceptions.py Normal file
View File

@@ -0,0 +1,6 @@
class YFinanceException(Exception):
pass
class YFinanceDataException(YFinanceException):
pass

View File

@@ -29,10 +29,10 @@ from . import Ticker, utils
from . import shared
def download(tickers, start=None, end=None, actions=False, threads=True, ignore_tz=True,
group_by='column', auto_adjust=False, back_adjust=False, keepna=False,
def download(tickers, start=None, end=None, actions=False, threads=True, ignore_tz=None,
group_by='column', auto_adjust=False, back_adjust=False, repair=False, keepna=False,
progress=True, period="max", show_errors=True, interval="1d", prepost=False,
proxy=None, rounding=False, timeout=None, **kwargs):
proxy=None, rounding=False, timeout=10):
"""Download yahoo tickers
:Parameters:
tickers : str, list
@@ -56,6 +56,9 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
Default is False
auto_adjust: bool
Adjust all OHLC automatically? Default is False
repair: bool
Detect currency unit 100x mixups and attempt repair
Default is False
keepna: bool
Keep NaN rows returned by Yahoo?
Default is False
@@ -65,7 +68,7 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
How many threads to use for mass downloading. Default is True
ignore_tz: bool
When combining from different timezones, ignore that part of datetime.
Default is True
Default depends on interval. Intraday = False. Day+ = True.
proxy: str
Optional. Proxy server URL scheme. Default is None
rounding: bool
@@ -77,6 +80,14 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
seconds. (Can also be a fraction of a second e.g. 0.01)
"""
if ignore_tz is None:
# Set default value depending on interval
if interval[1:] in ['m', 'h']:
# Intraday
ignore_tz = False
else:
ignore_tz = True
# create ticker list
tickers = tickers if isinstance(
tickers, (list, set, tuple)) else tickers.replace(',', ' ').split()
@@ -111,7 +122,7 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
_download_one_threaded(ticker, period=period, interval=interval,
start=start, end=end, prepost=prepost,
actions=actions, auto_adjust=auto_adjust,
back_adjust=back_adjust, keepna=keepna,
back_adjust=back_adjust, repair=repair, keepna=keepna,
progress=(progress and i > 0), proxy=proxy,
rounding=rounding, timeout=timeout)
while len(shared._DFS) < len(tickers):
@@ -123,7 +134,8 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
data = _download_one(ticker, period=period, interval=interval,
start=start, end=end, prepost=prepost,
actions=actions, auto_adjust=auto_adjust,
back_adjust=back_adjust, keepna=keepna, proxy=proxy,
back_adjust=back_adjust, repair=repair, keepna=keepna,
proxy=proxy,
rounding=rounding, timeout=timeout)
shared._DFS[ticker.upper()] = data
if progress:
@@ -141,7 +153,7 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
if ignore_tz:
for tkr in shared._DFS.keys():
if (shared._DFS[tkr] is not None) and (shared._DFS[tkr].shape[0]>0):
if (shared._DFS[tkr] is not None) and (shared._DFS[tkr].shape[0] > 0):
shared._DFS[tkr].index = shared._DFS[tkr].index.tz_localize(None)
if len(tickers) == 1:
@@ -191,28 +203,34 @@ def _realign_dfs():
@_multitasking.task
def _download_one_threaded(ticker, start=None, end=None,
auto_adjust=False, back_adjust=False,
auto_adjust=False, back_adjust=False, repair=False,
actions=False, progress=True, period="max",
interval="1d", prepost=False, proxy=None,
keepna=False, rounding=False, timeout=None):
data = _download_one(ticker, start, end, auto_adjust, back_adjust,
actions, period, interval, prepost, proxy, rounding,
keepna, timeout)
shared._DFS[ticker.upper()] = data
keepna=False, rounding=False, timeout=10):
try:
data = _download_one(ticker, start, end, auto_adjust, back_adjust, repair,
actions, period, interval, prepost, proxy, rounding,
keepna, timeout)
except Exception as e:
# glob try/except needed as current thead implementation breaks if exception is raised.
shared._DFS[ticker] = utils.empty_df()
shared._ERRORS[ticker] = repr(e)
else:
shared._DFS[ticker.upper()] = data
if progress:
shared._PROGRESS_BAR.animate()
def _download_one(ticker, start=None, end=None,
auto_adjust=False, back_adjust=False,
auto_adjust=False, back_adjust=False, repair=False,
actions=False, period="max", interval="1d",
prepost=False, proxy=None, rounding=False,
keepna=False, timeout=None):
return Ticker(ticker).history(period=period, interval=interval,
start=start, end=end, prepost=prepost,
actions=actions, auto_adjust=auto_adjust,
back_adjust=back_adjust, proxy=proxy,
rounding=rounding, keepna=keepna, many=True,
timeout=timeout)
keepna=False, timeout=10):
return Ticker(ticker).history(
period=period, interval=interval,
start=start, end=end, prepost=prepost,
actions=actions, auto_adjust=auto_adjust,
back_adjust=back_adjust, repair=repair, proxy=proxy,
rounding=rounding, keepna=keepna, timeout=timeout,
debug=False, raise_errors=False # debug and raise_errors false to not log and raise errors in threads
)

View File

View File

@@ -0,0 +1,118 @@
import pandas as pd
from yfinance import utils
from yfinance.data import TickerData
class Analysis:
def __init__(self, data: TickerData, proxy=None):
self._data = data
self.proxy = proxy
self._earnings_trend = None
self._analyst_trend_details = None
self._analyst_price_target = None
self._rev_est = None
self._eps_est = None
self._already_scraped = False
@property
def earnings_trend(self) -> pd.DataFrame:
if self._earnings_trend is None:
self._scrape(self.proxy)
return self._earnings_trend
@property
def analyst_trend_details(self) -> pd.DataFrame:
if self._analyst_trend_details is None:
self._scrape(self.proxy)
return self._analyst_trend_details
@property
def analyst_price_target(self) -> pd.DataFrame:
if self._analyst_price_target is None:
self._scrape(self.proxy)
return self._analyst_price_target
@property
def rev_est(self) -> pd.DataFrame:
if self._rev_est is None:
self._scrape(self.proxy)
return self._rev_est
@property
def eps_est(self) -> pd.DataFrame:
if self._eps_est is None:
self._scrape(self.proxy)
return self._eps_est
def _scrape(self, proxy):
if self._already_scraped:
return
self._already_scraped = True
# Analysis Data/Analyst Forecasts
analysis_data = self._data.get_json_data_stores("analysis", proxy=proxy)
try:
analysis_data = analysis_data['QuoteSummaryStore']
except KeyError as e:
err_msg = "No analysis data found, symbol may be delisted"
print('- %s: %s' % (self._data.ticker, err_msg))
return
if isinstance(analysis_data.get('earningsTrend'), dict):
try:
analysis = pd.DataFrame(analysis_data['earningsTrend']['trend'])
analysis['endDate'] = pd.to_datetime(analysis['endDate'])
analysis.set_index('period', inplace=True)
analysis.index = analysis.index.str.upper()
analysis.index.name = 'Period'
analysis.columns = utils.camel2title(analysis.columns)
dict_cols = []
for idx, row in analysis.iterrows():
for colname, colval in row.items():
if isinstance(colval, dict):
dict_cols.append(colname)
for k, v in colval.items():
new_colname = colname + ' ' + \
utils.camel2title([k])[0]
analysis.loc[idx, new_colname] = v
self._earnings_trend = analysis[[
c for c in analysis.columns if c not in dict_cols]]
except Exception:
pass
try:
self._analyst_trend_details = pd.DataFrame(analysis_data['recommendationTrend']['trend'])
except Exception as e:
self._analyst_trend_details = None
try:
self._analyst_price_target = pd.DataFrame(analysis_data['financialData'], index=[0])[
['targetLowPrice', 'currentPrice', 'targetMeanPrice', 'targetHighPrice', 'numberOfAnalystOpinions']].T
except Exception as e:
self._analyst_price_target = None
earnings_estimate = []
revenue_estimate = []
if self._analyst_trend_details is not None :
for key in analysis_data['earningsTrend']['trend']:
try:
earnings_dict = key['earningsEstimate']
earnings_dict['period'] = key['period']
earnings_dict['endDate'] = key['endDate']
earnings_estimate.append(earnings_dict)
revenue_dict = key['revenueEstimate']
revenue_dict['period'] = key['period']
revenue_dict['endDate'] = key['endDate']
revenue_estimate.append(revenue_dict)
except Exception as e:
pass
self._rev_est = pd.DataFrame(revenue_estimate)
self._eps_est = pd.DataFrame(earnings_estimate)
else:
self._rev_est = pd.DataFrame()
self._eps_est = pd.DataFrame()

View File

@@ -0,0 +1,319 @@
import datetime
import json
import pandas as pd
import numpy as np
from yfinance import utils
from yfinance.data import TickerData
from yfinance.exceptions import YFinanceDataException, YFinanceException
class Fundamentals:
def __init__(self, data: TickerData, proxy=None):
self._data = data
self.proxy = proxy
self._earnings = None
self._financials = None
self._shares = None
self._financials_data = None
self._fin_data_quote = None
self._basics_already_scraped = False
self._financials = Financials(data)
@property
def financials(self) -> "Financials":
return self._financials
@property
def earnings(self) -> dict:
if self._earnings is None:
self._scrape_earnings(self.proxy)
return self._earnings
@property
def shares(self) -> pd.DataFrame:
if self._shares is None:
self._scrape_shares(self.proxy)
return self._shares
def _scrape_basics(self, proxy):
if self._basics_already_scraped:
return
self._basics_already_scraped = True
self._financials_data = self._data.get_json_data_stores('financials', proxy)
try:
self._fin_data_quote = self._financials_data['QuoteSummaryStore']
except KeyError:
err_msg = "No financials data found, symbol may be delisted"
print('- %s: %s' % (self._data.ticker, err_msg))
return None
def _scrape_earnings(self, proxy):
self._scrape_basics(proxy)
# earnings
self._earnings = {"yearly": pd.DataFrame(), "quarterly": pd.DataFrame()}
if self._fin_data_quote is None:
return
if isinstance(self._fin_data_quote.get('earnings'), dict):
try:
earnings = self._fin_data_quote['earnings']['financialsChart']
earnings['financialCurrency'] = self._fin_data_quote['earnings'].get('financialCurrency', 'USD')
self._earnings['financialCurrency'] = earnings['financialCurrency']
df = pd.DataFrame(earnings['yearly']).set_index('date')
df.columns = utils.camel2title(df.columns)
df.index.name = 'Year'
self._earnings['yearly'] = df
df = pd.DataFrame(earnings['quarterly']).set_index('date')
df.columns = utils.camel2title(df.columns)
df.index.name = 'Quarter'
self._earnings['quarterly'] = df
except Exception:
pass
def _scrape_shares(self, proxy):
self._scrape_basics(proxy)
# shares outstanding
try:
# keep only years with non None data
available_shares = [shares_data for shares_data in
self._financials_data['QuoteTimeSeriesStore']['timeSeries']['annualBasicAverageShares']
if
shares_data]
shares = pd.DataFrame(available_shares)
shares['Year'] = shares['asOfDate'].agg(lambda x: int(x[:4]))
shares.set_index('Year', inplace=True)
shares.drop(columns=['dataId', 'asOfDate',
'periodType', 'currencyCode'], inplace=True)
shares.rename(
columns={'reportedValue': "BasicShares"}, inplace=True)
self._shares = shares
except Exception:
pass
class Financials:
def __init__(self, data: TickerData):
self._data = data
self._income_time_series = {}
self._balance_sheet_time_series = {}
self._cash_flow_time_series = {}
self._income_scraped = {}
self._balance_sheet_scraped = {}
self._cash_flow_scraped = {}
def get_income_time_series(self, freq="yearly", proxy=None) -> pd.DataFrame:
res = self._income_time_series
if freq not in res:
res[freq] = self._fetch_time_series("income", freq, proxy=None)
return res[freq]
def get_balance_sheet_time_series(self, freq="yearly", proxy=None) -> pd.DataFrame:
res = self._balance_sheet_time_series
if freq not in res:
res[freq] = self._fetch_time_series("balance-sheet", freq, proxy=None)
return res[freq]
def get_cash_flow_time_series(self, freq="yearly", proxy=None) -> pd.DataFrame:
res = self._cash_flow_time_series
if freq not in res:
res[freq] = self._fetch_time_series("cash-flow", freq, proxy=None)
return res[freq]
def _fetch_time_series(self, name, timescale, proxy=None):
# Fetching time series preferred over scraping 'QuoteSummaryStore',
# because it matches what Yahoo shows. But for some tickers returns nothing,
# despite 'QuoteSummaryStore' containing valid data.
allowed_names = ["income", "balance-sheet", "cash-flow"]
allowed_timescales = ["yearly", "quarterly"]
if name not in allowed_names:
raise ValueError("Illegal argument: name must be one of: {}".format(allowed_names))
if timescale not in allowed_timescales:
raise ValueError("Illegal argument: timescale must be one of: {}".format(allowed_names))
try:
statement = self._create_financials_table(name, timescale, proxy)
if statement is not None:
return statement
except YFinanceException as e:
print(f"- {self._data.ticker}: Failed to create {name} financials table for reason: {repr(e)}")
return pd.DataFrame()
def _create_financials_table(self, name, timescale, proxy):
if name == "income":
# Yahoo stores the 'income' table internally under 'financials' key
name = "financials"
keys = self._get_datastore_keys(name, proxy)
try:
return self.get_financials_time_series(timescale, keys, proxy)
except Exception as e:
pass
def _get_datastore_keys(self, sub_page, proxy) -> list:
data_stores = self._data.get_json_data_stores(sub_page, proxy)
# Step 1: get the keys:
def _finditem1(key, obj):
values = []
if isinstance(obj, dict):
if key in obj.keys():
values.append(obj[key])
for k, v in obj.items():
values += _finditem1(key, v)
elif isinstance(obj, list):
for v in obj:
values += _finditem1(key, v)
return values
try:
keys = _finditem1("key", data_stores['FinancialTemplateStore'])
except KeyError as e:
raise YFinanceDataException("Parsing FinancialTemplateStore failed, reason: {}".format(repr(e)))
if not keys:
raise YFinanceDataException("No keys in FinancialTemplateStore")
return keys
def get_financials_time_series(self, timescale, keys: list, proxy=None) -> pd.DataFrame:
timescale_translation = {"yearly": "annual", "quarterly": "quarterly"}
timescale = timescale_translation[timescale]
# Step 2: construct url:
ts_url_base = \
"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{0}?symbol={0}" \
.format(self._data.ticker)
url = ts_url_base + "&type=" + ",".join([timescale + k for k in keys])
# Yahoo returns maximum 4 years or 5 quarters, regardless of start_dt:
start_dt = datetime.datetime(2016, 12, 31)
end = pd.Timestamp.utcnow().ceil("D")
url += "&period1={}&period2={}".format(int(start_dt.timestamp()), int(end.timestamp()))
# Step 3: fetch and reshape data
json_str = self._data.cache_get(url=url, proxy=proxy).text
json_data = json.loads(json_str)
data_raw = json_data["timeseries"]["result"]
# data_raw = [v for v in data_raw if len(v) > 1] # Discard keys with no data
for d in data_raw:
del d["meta"]
# Now reshape data into a table:
# Step 1: get columns and index:
timestamps = set()
data_unpacked = {}
for x in data_raw:
for k in x.keys():
if k == "timestamp":
timestamps.update(x[k])
else:
data_unpacked[k] = x[k]
timestamps = sorted(list(timestamps))
dates = pd.to_datetime(timestamps, unit="s")
df = pd.DataFrame(columns=dates, index=list(data_unpacked.keys()))
for k, v in data_unpacked.items():
if df is None:
df = pd.DataFrame(columns=dates, index=[k])
df.loc[k] = {pd.Timestamp(x["asOfDate"]): x["reportedValue"]["raw"] for x in v}
df.index = df.index.str.replace("^" + timescale, "", regex=True)
# Reorder table to match order on Yahoo website
df = df.reindex([k for k in keys if k in df.index])
df = df[sorted(df.columns, reverse=True)]
return df
def get_income_scrape(self, freq="yearly", proxy=None) -> pd.DataFrame:
res = self._income_scraped
if freq not in res:
res[freq] = self._scrape("income", freq, proxy=None)
return res[freq]
def get_balance_sheet_scrape(self, freq="yearly", proxy=None) -> pd.DataFrame:
res = self._balance_sheet_scraped
if freq not in res:
res[freq] = self._scrape("balance-sheet", freq, proxy=None)
return res[freq]
def get_cash_flow_scrape(self, freq="yearly", proxy=None) -> pd.DataFrame:
res = self._cash_flow_scraped
if freq not in res:
res[freq] = self._scrape("cash-flow", freq, proxy=None)
return res[freq]
def _scrape(self, name, timescale, proxy=None):
# Backup in case _fetch_time_series() fails to return data
allowed_names = ["income", "balance-sheet", "cash-flow"]
allowed_timescales = ["yearly", "quarterly"]
if name not in allowed_names:
raise ValueError("Illegal argument: name must be one of: {}".format(allowed_names))
if timescale not in allowed_timescales:
raise ValueError("Illegal argument: timescale must be one of: {}".format(allowed_names))
try:
statement = self._create_financials_table_old(name, timescale, proxy)
if statement is not None:
return statement
except YFinanceException as e:
print(f"- {self._data.ticker}: Failed to create financials table for {name} reason: {repr(e)}")
return pd.DataFrame()
def _create_financials_table_old(self, name, timescale, proxy):
data_stores = self._data.get_json_data_stores("financials", proxy)
# Fetch raw data
if not "QuoteSummaryStore" in data_stores:
raise YFinanceDataException(f"Yahoo not returning legacy financials data")
data = data_stores["QuoteSummaryStore"]
if name == "cash-flow":
key1 = "cashflowStatement"
key2 = "cashflowStatements"
elif name == "balance-sheet":
key1 = "balanceSheet"
key2 = "balanceSheetStatements"
else:
key1 = "incomeStatement"
key2 = "incomeStatementHistory"
key1 += "History"
if timescale == "quarterly":
key1 += "Quarterly"
if key1 not in data or data[key1] is None or key2 not in data[key1]:
raise YFinanceDataException(f"Yahoo not returning legacy {name} financials data")
data = data[key1][key2]
# Tabulate
df = pd.DataFrame(data)
if len(df) == 0:
raise YFinanceDataException(f"Yahoo not returning legacy {name} financials data")
df = df.drop(columns=['maxAge'])
for col in df.columns:
df[col] = df[col].replace('-', np.nan)
df.set_index('endDate', inplace=True)
try:
df.index = pd.to_datetime(df.index, unit='s')
except ValueError:
df.index = pd.to_datetime(df.index)
df = df.T
df.columns.name = ''
df.index.name = 'Breakdown'
# rename incorrect yahoo key
df.rename(index={'treasuryStock': 'gainsLossesNotAffectingRetainedEarnings'}, inplace=True)
# Upper-case first letter, leave rest unchanged:
s0 = df.index[0]
df.index = [s[0].upper()+s[1:] for s in df.index]
return df

View File

@@ -0,0 +1,66 @@
import pandas as pd
from yfinance.data import TickerData
class Holders:
_SCRAPE_URL_ = 'https://finance.yahoo.com/quote'
def __init__(self, data: TickerData, proxy=None):
self._data = data
self.proxy = proxy
self._major = None
self._institutional = None
self._mutualfund = None
@property
def major(self) -> pd.DataFrame:
if self._major is None:
self._scrape(self.proxy)
return self._major
@property
def institutional(self) -> pd.DataFrame:
if self._institutional is None:
self._scrape(self.proxy)
return self._institutional
@property
def mutualfund(self) -> pd.DataFrame:
if self._mutualfund is None:
self._scrape(self.proxy)
return self._mutualfund
def _scrape(self, proxy):
ticker_url = "{}/{}".format(self._SCRAPE_URL_, self._data.ticker)
try:
resp = self._data.cache_get(ticker_url + '/holders', proxy)
holders = pd.read_html(resp.text)
except Exception:
holders = []
if len(holders) >= 3:
self._major = holders[0]
self._institutional = holders[1]
self._mutualfund = holders[2]
elif len(holders) >= 2:
self._major = holders[0]
self._institutional = holders[1]
elif len(holders) >= 1:
self._major = holders[0]
if self._institutional is not None:
if 'Date Reported' in self._institutional:
self._institutional['Date Reported'] = pd.to_datetime(
self._institutional['Date Reported'])
if '% Out' in self._institutional:
self._institutional['% Out'] = self._institutional[
'% Out'].str.replace('%', '').astype(float) / 100
if self._mutualfund is not None:
if 'Date Reported' in self._mutualfund:
self._mutualfund['Date Reported'] = pd.to_datetime(
self._mutualfund['Date Reported'])
if '% Out' in self._mutualfund:
self._mutualfund['% Out'] = self._mutualfund[
'% Out'].str.replace('%', '').astype(float) / 100

296
yfinance/scrapers/quote.py Normal file
View File

@@ -0,0 +1,296 @@
import datetime
import json
import pandas as pd
from yfinance import utils
from yfinance.data import TickerData
info_retired_keys_price = {"currentPrice", "dayHigh", "dayLow", "open", "previousClose", "volume", "volume24Hr"}
info_retired_keys_price.update({"regularMarket"+s for s in ["DayHigh", "DayLow", "Open", "PreviousClose", "Price", "Volume"]})
info_retired_keys_price.update({"fiftyTwoWeekLow", "fiftyTwoWeekHigh", "fiftyTwoWeekChange", "52WeekChange", "fiftyDayAverage", "twoHundredDayAverage"})
info_retired_keys_price.update({"averageDailyVolume10Day", "averageVolume10days", "averageVolume"})
info_retired_keys_exchange = {"currency", "exchange", "exchangeTimezoneName", "exchangeTimezoneShortName", "quoteType"}
info_retired_keys_marketCap = {"marketCap"}
info_retired_keys_symbol = {"symbol"}
info_retired_keys = info_retired_keys_price | info_retired_keys_exchange | info_retired_keys_marketCap | info_retired_keys_symbol
PRUNE_INFO = True
# PRUNE_INFO = False
from collections.abc import MutableMapping
class InfoDictWrapper(MutableMapping):
""" Simple wrapper around info dict, intercepting 'gets' to
print how-to-migrate messages for specific keys. Requires
override dict API"""
def __init__(self, info):
self.info = info
def keys(self):
return self.info.keys()
def __str__(self):
return self.info.__str__()
def __repr__(self):
return self.info.__repr__()
def __contains__(self, k):
return k in self.info.keys()
def __getitem__(self, k):
if k in info_retired_keys_price:
print(f"Price data removed from info (key='{k}'). Use Ticker.fast_info or history() instead")
return None
elif k in info_retired_keys_exchange:
print(f"Exchange data removed from info (key='{k}'). Use Ticker.fast_info or Ticker.get_history_metadata() instead")
return None
elif k in info_retired_keys_marketCap:
print(f"Market cap removed from info (key='{k}'). Use Ticker.fast_info instead")
return None
elif k in info_retired_keys_symbol:
print(f"Symbol removed from info (key='{k}'). You know this already")
return None
return self.info[self._keytransform(k)]
def __setitem__(self, k, value):
self.info[self._keytransform(k)] = value
def __delitem__(self, k):
del self.info[self._keytransform(k)]
def __iter__(self):
return iter(self.info)
def __len__(self):
return len(self.info)
def _keytransform(self, k):
return k
class Quote:
def __init__(self, data: TickerData, proxy=None):
self._data = data
self.proxy = proxy
self._info = None
self._retired_info = None
self._sustainability = None
self._recommendations = None
self._calendar = None
self._already_scraped = False
self._already_scraped_complementary = False
@property
def info(self) -> dict:
if self._info is None:
self._scrape(self.proxy)
self._scrape_complementary(self.proxy)
return self._info
@property
def sustainability(self) -> pd.DataFrame:
if self._sustainability is None:
self._scrape(self.proxy)
return self._sustainability
@property
def recommendations(self) -> pd.DataFrame:
if self._recommendations is None:
self._scrape(self.proxy)
return self._recommendations
@property
def calendar(self) -> pd.DataFrame:
if self._calendar is None:
self._scrape(self.proxy)
return self._calendar
def _scrape(self, proxy):
if self._already_scraped:
return
self._already_scraped = True
# get info and sustainability
json_data = self._data.get_json_data_stores(proxy=proxy)
try:
quote_summary_store = json_data['QuoteSummaryStore']
except KeyError:
err_msg = "No summary info found, symbol may be delisted"
print('- %s: %s' % (self._data.ticker, err_msg))
return None
# sustainability
d = {}
try:
if isinstance(quote_summary_store.get('esgScores'), dict):
for item in quote_summary_store['esgScores']:
if not isinstance(quote_summary_store['esgScores'][item], (dict, list)):
d[item] = quote_summary_store['esgScores'][item]
s = pd.DataFrame(index=[0], data=d)[-1:].T
s.columns = ['Value']
s.index.name = '%.f-%.f' % (
s[s.index == 'ratingYear']['Value'].values[0],
s[s.index == 'ratingMonth']['Value'].values[0])
self._sustainability = s[~s.index.isin(
['maxAge', 'ratingYear', 'ratingMonth'])]
except Exception:
pass
self._info = {}
try:
items = ['summaryProfile', 'financialData', 'quoteType',
'defaultKeyStatistics', 'assetProfile', 'summaryDetail']
for item in items:
if isinstance(quote_summary_store.get(item), dict):
self._info.update(quote_summary_store[item])
except Exception:
pass
# For ETFs, provide this valuable data: the top holdings of the ETF
try:
if 'topHoldings' in quote_summary_store:
self._info.update(quote_summary_store['topHoldings'])
except Exception:
pass
try:
if not isinstance(quote_summary_store.get('summaryDetail'), dict):
# For some reason summaryDetail did not give any results. The price dict
# usually has most of the same info
self._info.update(quote_summary_store.get('price', {}))
except Exception:
pass
try:
# self._info['regularMarketPrice'] = self._info['regularMarketOpen']
self._info['regularMarketPrice'] = quote_summary_store.get('price', {}).get(
'regularMarketPrice', self._info.get('regularMarketOpen', None))
except Exception:
pass
try:
self._info['preMarketPrice'] = quote_summary_store.get('price', {}).get(
'preMarketPrice', self._info.get('preMarketPrice', None))
except Exception:
pass
self._info['logo_url'] = ""
try:
if not 'website' in self._info:
self._info['logo_url'] = 'https://logo.clearbit.com/%s.com' % \
self._info['shortName'].split(' ')[0].split(',')[0]
else:
domain = self._info['website'].split(
'://')[1].split('/')[0].replace('www.', '')
self._info['logo_url'] = 'https://logo.clearbit.com/%s' % domain
except Exception:
pass
# Delete redundant info[] keys, because values can be accessed faster
# elsewhere - e.g. price keys. Hope is reduces Yahoo spam effect.
# But record the dropped keys, because in rare cases they are needed.
self._retired_info = {}
for k in info_retired_keys:
if k in self._info:
self._retired_info[k] = self._info[k]
if PRUNE_INFO:
del self._info[k]
if PRUNE_INFO:
# InfoDictWrapper will explain how to access above data elsewhere
self._info = InfoDictWrapper(self._info)
# events
try:
cal = pd.DataFrame(quote_summary_store['calendarEvents']['earnings'])
cal['earningsDate'] = pd.to_datetime(
cal['earningsDate'], unit='s')
self._calendar = cal.T
self._calendar.index = utils.camel2title(self._calendar.index)
self._calendar.columns = ['Value']
except Exception as e:
pass
# analyst recommendations
try:
rec = pd.DataFrame(
quote_summary_store['upgradeDowngradeHistory']['history'])
rec['earningsDate'] = pd.to_datetime(
rec['epochGradeDate'], unit='s')
rec.set_index('earningsDate', inplace=True)
rec.index.name = 'Date'
rec.columns = utils.camel2title(rec.columns)
self._recommendations = rec[[
'Firm', 'To Grade', 'From Grade', 'Action']].sort_index()
except Exception:
pass
def _scrape_complementary(self, proxy):
if self._already_scraped_complementary:
return
self._already_scraped_complementary = True
self._scrape(proxy)
if self._info is None:
return
# Complementary key-statistics. For now just want 'trailing PEG ratio'
keys = {"trailingPegRatio"}
if keys:
# Simplified the original scrape code for key-statistics. Very expensive for fetching
# just one value, best if scraping most/all:
#
# p = _re.compile(r'root\.App\.main = (.*);')
# url = 'https://finance.yahoo.com/quote/{}/key-statistics?p={}'.format(self._ticker.ticker, self._ticker.ticker)
# try:
# r = session.get(url, headers=utils.user_agent_headers)
# data = _json.loads(p.findall(r.text)[0])
# key_stats = data['context']['dispatcher']['stores']['QuoteTimeSeriesStore']["timeSeries"]
# for k in keys:
# if k not in key_stats or len(key_stats[k])==0:
# # Yahoo website prints N/A, indicates Yahoo lacks necessary data to calculate
# v = None
# else:
# # Select most recent (last) raw value in list:
# v = key_stats[k][-1]["reportedValue"]["raw"]
# self._info[k] = v
# except Exception:
# raise
# pass
#
# For just one/few variable is faster to query directly:
url = "https://query1.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{}?symbol={}".format(
self._data.ticker, self._data.ticker)
for k in keys:
url += "&type=" + k
# Request 6 months of data
start = pd.Timestamp.utcnow().floor("D") - datetime.timedelta(days=365 // 2)
start = int(start.timestamp())
end = pd.Timestamp.utcnow().ceil("D")
end = int(end.timestamp())
url += f"&period1={start}&period2={end}"
json_str = self._data.cache_get(url=url, proxy=proxy).text
json_data = json.loads(json_str)
try:
key_stats = json_data["timeseries"]["result"][0]
if k not in key_stats:
# Yahoo website prints N/A, indicates Yahoo lacks necessary data to calculate
v = None
else:
# Select most recent (last) raw value in list:
v = key_stats[k][-1]["reportedValue"]["raw"]
except Exception:
v = None
self._info[k] = v

View File

@@ -0,0 +1,8 @@
daf93e37cbf219cd4c1f3f74ec4551265ec5565b99e8c9322dccd6872941cf13c818cbb88cba6f530e643b4e2329b17ec7161f4502ce6a02bb0dbbe5fc0d0474
ad4d90b3c9f2e1d156ef98eadfa0ff93e4042f6960e54aa2a13f06f528e6b50ba4265a26a1fd5b9cd3db0d268a9c34e1d080592424309429a58bce4adc893c87
e9a8ab8e5620b712ebc2fb4f33d5c8b9c80c0d07e8c371911c785cf674789f1747d76a909510158a7b7419e86857f2d7abbd777813ff64840e4cbc514d12bcae
6ae2523aeafa283dad746556540145bf603f44edbf37ad404d3766a8420bb5eb1d3738f52a227b88283cca9cae44060d5f0bba84b6a495082589f5fe7acbdc9e
3365117c2a368ffa5df7313a4a84988f73926a86358e8eea9497c5ff799ce27d104b68e5f2fbffa6f8f92c1fef41765a7066fa6bcf050810a9c4c7872fd3ebf0
15d8f57919857d5a5358d2082c7ef0f1129cfacd2a6480333dcfb954b7bb67d820abefebfdb0eaa6ef18a1c57f617b67d7e7b0ec040403b889630ae5db5a4dbb
db9630d707a7d0953ac795cd8db1ca9ca6c9d8239197cdfda24b4e0ec9c37eaec4db82dab68b8f606ab7b5b4af3e65dab50606f8cf508269ec927e6ee605fb78
3c895fb5ddcc37d20d3073ed74ee3efad59bcb147c8e80fd279f83701b74b092d503dcd399604c6d8be8f3013429d3c2c76ed5b31b80c9df92d5eab6d3339fce

View File

@@ -21,21 +21,18 @@
from __future__ import print_function
# import time as _time
import datetime as _datetime
import requests as _requests
import pandas as _pd
# import numpy as _np
# import json as _json
# import re as _re
from collections import namedtuple as _namedtuple
from . import utils
from .base import TickerBase
class Ticker(TickerBase):
def __init__(self, ticker, session=None):
super(Ticker, self).__init__(ticker, session=session)
self._expirations = {}
def __repr__(self):
return 'yfinance.Ticker object <%s>' % self.ticker
@@ -48,17 +45,7 @@ class Ticker(TickerBase):
url = "{}/v7/finance/options/{}?date={}".format(
self._base_url, self.ticker, date)
# setup proxy in requests format
if proxy is not None:
if isinstance(proxy, dict) and "https" in proxy:
proxy = proxy["https"]
proxy = {"https": proxy}
r = _requests.get(
url=url,
proxies=proxy,
headers=utils.user_agent_headers
).json()
r = self._data.get(url=url, proxy=proxy).json()
if len(r.get('optionChain', {}).get('result', [])) > 0:
for exp in r['optionChain']['result'][0]['expirationDates']:
self._expirations[_datetime.datetime.utcfromtimestamp(
@@ -115,39 +102,43 @@ class Ticker(TickerBase):
return self.get_isin()
@property
def major_holders(self):
def major_holders(self) -> _pd.DataFrame:
return self.get_major_holders()
@property
def institutional_holders(self):
def institutional_holders(self) -> _pd.DataFrame:
return self.get_institutional_holders()
@property
def mutualfund_holders(self):
def mutualfund_holders(self) -> _pd.DataFrame:
return self.get_mutualfund_holders()
@property
def dividends(self):
def dividends(self) -> _pd.Series:
return self.get_dividends()
@property
def splits(self):
def capital_gains(self):
return self.get_capital_gains()
@property
def splits(self) -> _pd.Series:
return self.get_splits()
@property
def actions(self):
def actions(self) -> _pd.DataFrame:
return self.get_actions()
@property
def shares(self):
def shares(self) -> _pd.DataFrame :
return self.get_shares()
@property
def info(self):
def info(self) -> dict:
return self.get_info()
@property
def calendar(self):
def calendar(self) -> _pd.DataFrame:
return self.get_calendar()
@property
@@ -155,51 +146,87 @@ class Ticker(TickerBase):
return self.get_recommendations()
@property
def earnings(self):
def earnings(self) -> _pd.DataFrame:
return self.get_earnings()
@property
def quarterly_earnings(self):
def quarterly_earnings(self) -> _pd.DataFrame:
return self.get_earnings(freq='quarterly')
@property
def financials(self):
return self.get_financials()
def income_stmt(self) -> _pd.DataFrame:
return self.get_income_stmt(pretty=True)
@property
def quarterly_financials(self):
return self.get_financials(freq='quarterly')
def quarterly_income_stmt(self) -> _pd.DataFrame:
return self.get_income_stmt(pretty=True, freq='quarterly')
@property
def balance_sheet(self):
return self.get_balancesheet()
def incomestmt(self) -> _pd.DataFrame:
return self.income_stmt
@property
def quarterly_balance_sheet(self):
return self.get_balancesheet(freq='quarterly')
def quarterly_incomestmt(self) -> _pd.DataFrame:
return self.quarterly_income_stmt
@property
def balancesheet(self):
return self.get_balancesheet()
def financials(self) -> _pd.DataFrame:
return self.income_stmt
@property
def quarterly_balancesheet(self):
return self.get_balancesheet(freq='quarterly')
def quarterly_financials(self) -> _pd.DataFrame:
return self.quarterly_income_stmt
@property
def cashflow(self):
return self.get_cashflow()
def balance_sheet(self) -> _pd.DataFrame:
return self.get_balance_sheet(pretty=True)
@property
def quarterly_cashflow(self):
return self.get_cashflow(freq='quarterly')
def quarterly_balance_sheet(self) -> _pd.DataFrame:
return self.get_balance_sheet(pretty=True, freq='quarterly')
@property
def sustainability(self):
def balancesheet(self) -> _pd.DataFrame:
return self.balance_sheet
@property
def quarterly_balancesheet(self) -> _pd.DataFrame:
return self.quarterly_balance_sheet
@property
def cash_flow(self) -> _pd.DataFrame:
return self.get_cash_flow(pretty=True, freq="yearly")
@property
def quarterly_cash_flow(self) -> _pd.DataFrame:
return self.get_cash_flow(pretty=True, freq='quarterly')
@property
def cashflow(self) -> _pd.DataFrame:
return self.cash_flow
@property
def quarterly_cashflow(self) -> _pd.DataFrame:
return self.quarterly_cash_flow
@property
def recommendations_summary(self):
return self.get_recommendations_summary()
@property
def analyst_price_target(self) -> _pd.DataFrame:
return self.get_analyst_price_target()
@property
def revenue_forecasts(self) -> _pd.DataFrame:
return self.get_rev_forecast()
@property
def sustainability(self) -> _pd.DataFrame:
return self.get_sustainability()
@property
def options(self):
def options(self) -> tuple:
if not self._expirations:
self._download_options()
return tuple(self._expirations.keys())
@@ -209,13 +236,17 @@ class Ticker(TickerBase):
return self.get_news()
@property
def analysis(self):
return self.get_analysis()
def earnings_trend(self) -> _pd.DataFrame:
return self.get_earnings_trend()
@property
def earnings_history(self):
return self.get_earnings_history()
@property
def earnings_dates(self):
def earnings_dates(self) -> _pd.DataFrame:
return self.get_earnings_dates()
@property
def earnings_forecasts(self) -> _pd.DataFrame:
return self.get_earnings_forecast()
@property
def history_metadata(self) -> dict:
return self.get_history_metadata()

View File

@@ -25,7 +25,7 @@ from . import Ticker, multi
# from collections import namedtuple as _namedtuple
class Tickers():
class Tickers:
def __repr__(self):
return 'yfinance.Tickers object <%s>' % ",".join(self.symbols)
@@ -34,39 +34,39 @@ class Tickers():
tickers = tickers if isinstance(
tickers, list) else tickers.replace(',', ' ').split()
self.symbols = [ticker.upper() for ticker in tickers]
ticker_objects = {}
self.tickers = {ticker:Ticker(ticker, session=session) for ticker in self.symbols}
for ticker in self.symbols:
ticker_objects[ticker] = Ticker(ticker, session=session)
self.tickers = ticker_objects
# self.tickers = _namedtuple(
# "Tickers", ticker_objects.keys(), rename=True
# )(*ticker_objects.values())
def history(self, period="1mo", interval="1d",
start=None, end=None, prepost=False,
actions=True, auto_adjust=True, proxy=None,
actions=True, auto_adjust=True, repair=False,
proxy=None,
threads=True, group_by='column', progress=True,
timeout=None, **kwargs):
timeout=10, **kwargs):
return self.download(
period, interval,
start, end, prepost,
actions, auto_adjust, proxy,
actions, auto_adjust, repair,
proxy,
threads, group_by, progress,
timeout, **kwargs)
def download(self, period="1mo", interval="1d",
start=None, end=None, prepost=False,
actions=True, auto_adjust=True, proxy=None,
actions=True, auto_adjust=True, repair=False,
proxy=None,
threads=True, group_by='column', progress=True,
timeout=None, **kwargs):
timeout=10, **kwargs):
data = multi.download(self.symbols,
start=start, end=end,
actions=actions,
auto_adjust=auto_adjust,
repair=repair,
period=period,
interval=interval,
prepost=prepost,

View File

@@ -22,6 +22,9 @@
from __future__ import print_function
import datetime as _datetime
import dateutil as _dateutil
from typing import Dict, Union, List, Optional
import pytz as _tz
import requests as _requests
import re as _re
@@ -30,23 +33,40 @@ import numpy as _np
import sys as _sys
import os as _os
import appdirs as _ad
import sqlite3 as _sqlite3
import atexit as _atexit
from threading import Lock
from pytz import UnknownTimeZoneError
try:
import ujson as _json
except ImportError:
import json as _json
user_agent_headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
# From https://stackoverflow.com/a/59128615
from types import FunctionType
from inspect import getmembers
def attributes(obj):
disallowed_names = {
name for name, value in getmembers(type(obj))
if isinstance(value, FunctionType)}
return {
name: getattr(obj, name) for name in dir(obj)
if name[0] != '_' and name not in disallowed_names and hasattr(obj, name)}
def is_isin(string):
return bool(_re.match("^([A-Z]{2})([A-Z0-9]{9})([0-9]{1})$", string))
def get_all_by_isin(isin, proxy=None, session=None):
if not(is_isin(isin)):
if not (is_isin(isin)):
raise ValueError("Invalid ISIN number")
from .base import _BASE_URL_
@@ -85,7 +105,9 @@ def get_news_by_isin(isin, proxy=None, session=None):
return data.get('news', {})
def empty_df(index=[]):
def empty_df(index=None):
if index is None:
index = []
empty = _pd.DataFrame(index=index, data={
'Open': _np.nan, 'High': _np.nan, 'Low': _np.nan,
'Close': _np.nan, 'Adj Close': _np.nan, 'Volume': _np.nan})
@@ -100,48 +122,192 @@ def empty_earnings_dates_df():
return empty
def get_html(url, proxy=None, session=None):
session = session or _requests
html = session.get(url=url, proxies=proxy, headers=user_agent_headers).text
return html
def build_template(data):
'''
build_template returns the details required to rebuild any of the yahoo finance financial statements in the same order as the yahoo finance webpage. The function is built to be used on the "FinancialTemplateStore" json which appears in any one of the three yahoo finance webpages: "/financials", "/cash-flow" and "/balance-sheet".
Returns:
- template_annual_order: The order that annual figures should be listed in.
- template_ttm_order: The order that TTM (Trailing Twelve Month) figures should be listed in.
- template_order: The order that quarterlies should be in (note that quarterlies have no pre-fix - hence why this is required).
- level_detail: The level of each individual line item. E.g. for the "/financials" webpage, "Total Revenue" is a level 0 item and is the summation of "Operating Revenue" and "Excise Taxes" which are level 1 items.
'''
template_ttm_order = [] # Save the TTM (Trailing Twelve Months) ordering to an object.
template_annual_order = [] # Save the annual ordering to an object.
template_order = [] # Save the ordering to an object (this can be utilized for quarterlies)
level_detail = [] # Record the level of each line item of the income statement ("Operating Revenue" and "Excise Taxes" sum to return "Total Revenue" we need to keep track of this)
for key in data['template']:
# Loop through the json to retreive the exact financial order whilst appending to the objects
template_ttm_order.append('trailing{}'.format(key['key']))
template_annual_order.append('annual{}'.format(key['key']))
template_order.append('{}'.format(key['key']))
level_detail.append(0)
if 'children' in key:
for child1 in key['children']: # Level 1
template_ttm_order.append('trailing{}'.format(child1['key']))
template_annual_order.append('annual{}'.format(child1['key']))
template_order.append('{}'.format(child1['key']))
level_detail.append(1)
if 'children' in child1:
for child2 in child1['children']: # Level 2
template_ttm_order.append('trailing{}'.format(child2['key']))
template_annual_order.append('annual{}'.format(child2['key']))
template_order.append('{}'.format(child2['key']))
level_detail.append(2)
if 'children' in child2:
for child3 in child2['children']: # Level 3
template_ttm_order.append('trailing{}'.format(child3['key']))
template_annual_order.append('annual{}'.format(child3['key']))
template_order.append('{}'.format(child3['key']))
level_detail.append(3)
if 'children' in child3:
for child4 in child3['children']: # Level 4
template_ttm_order.append('trailing{}'.format(child4['key']))
template_annual_order.append('annual{}'.format(child4['key']))
template_order.append('{}'.format(child4['key']))
level_detail.append(4)
if 'children' in child4:
for child5 in child4['children']: # Level 5
template_ttm_order.append('trailing{}'.format(child5['key']))
template_annual_order.append('annual{}'.format(child5['key']))
template_order.append('{}'.format(child5['key']))
level_detail.append(5)
return template_ttm_order, template_annual_order, template_order, level_detail
def get_json(url, proxy=None, session=None):
session = session or _requests
html = session.get(url=url, proxies=proxy, headers=user_agent_headers).text
def retreive_financial_details(data):
'''
retreive_financial_details returns all of the available financial details under the "QuoteTimeSeriesStore" for any of the following three yahoo finance webpages: "/financials", "/cash-flow" and "/balance-sheet".
if "QuoteSummaryStore" not in html:
html = session.get(url=url, proxies=proxy).text
if "QuoteSummaryStore" not in html:
return {}
json_str = html.split('root.App.main =')[1].split(
'(this)')[0].split(';\n}')[0].strip()
data = _json.loads(json_str)[
'context']['dispatcher']['stores']['QuoteSummaryStore']
# add data about Shares Outstanding for companies' tickers if they are available
try:
data['annualBasicAverageShares'] = _json.loads(
json_str)['context']['dispatcher']['stores'][
'QuoteTimeSeriesStore']['timeSeries']['annualBasicAverageShares']
except Exception:
pass
# return data
new_data = _json.dumps(data).replace('{}', 'null')
new_data = _re.sub(
r'\{[\'|\"]raw[\'|\"]:(.*?),(.*?)\}', r'\1', new_data)
return _json.loads(new_data)
Returns:
- TTM_dicts: A dictionary full of all of the available Trailing Twelve Month figures, this can easily be converted to a pandas dataframe.
- Annual_dicts: A dictionary full of all of the available Annual figures, this can easily be converted to a pandas dataframe.
'''
TTM_dicts = [] # Save a dictionary object to store the TTM financials.
Annual_dicts = [] # Save a dictionary object to store the Annual financials.
for key in data['timeSeries']: # Loop through the time series data to grab the key financial figures.
try:
if len(data['timeSeries'][key]) > 0:
time_series_dict = {}
time_series_dict['index'] = key
for each in data['timeSeries'][key]: # Loop through the years
if each == None:
continue
else:
time_series_dict[each['asOfDate']] = each['reportedValue']
# time_series_dict["{}".format(each['asOfDate'])] = data['timeSeries'][key][each]['reportedValue']
if 'trailing' in key:
TTM_dicts.append(time_series_dict)
elif 'annual' in key:
Annual_dicts.append(time_series_dict)
except Exception as e:
pass
return TTM_dicts, Annual_dicts
def camel2title(o):
return [_re.sub("([a-z])([A-Z])", r"\g<1> \g<2>", i).title() for i in o]
def format_annual_financial_statement(level_detail, annual_dicts, annual_order, ttm_dicts=None, ttm_order=None):
'''
format_annual_financial_statement formats any annual financial statement
Returns:
- _statement: A fully formatted annual financial statement in pandas dataframe.
'''
Annual = _pd.DataFrame.from_dict(annual_dicts).set_index("index")
Annual = Annual.reindex(annual_order)
Annual.index = Annual.index.str.replace(r'annual', '')
# Note: balance sheet is the only financial statement with no ttm detail
if (ttm_dicts not in [[], None]) and (ttm_order not in [[], None]):
TTM = _pd.DataFrame.from_dict(ttm_dicts).set_index("index")
TTM = TTM.reindex(ttm_order)
# Add 'TTM' prefix to all column names, so if combined we can tell
# the difference between actuals and TTM (similar to yahoo finance).
TTM.columns = ['TTM ' + str(col) for col in TTM.columns]
TTM.index = TTM.index.str.replace(r'trailing', '')
_statement = Annual.merge(TTM, left_index=True, right_index=True)
else:
_statement = Annual
_statement.index = camel2title(_statement.T.index)
_statement['level_detail'] = level_detail
_statement = _statement.set_index([_statement.index, 'level_detail'])
_statement = _statement[sorted(_statement.columns, reverse=True)]
_statement = _statement.dropna(how='all')
return _statement
def format_quarterly_financial_statement(_statement, level_detail, order):
'''
format_quarterly_financial_statements formats any quarterly financial statement
Returns:
- _statement: A fully formatted quarterly financial statement in pandas dataframe.
'''
_statement = _statement.reindex(order)
_statement.index = camel2title(_statement.T)
_statement['level_detail'] = level_detail
_statement = _statement.set_index([_statement.index, 'level_detail'])
_statement = _statement[sorted(_statement.columns, reverse=True)]
_statement = _statement.dropna(how='all')
_statement.columns = _pd.to_datetime(_statement.columns).date
return _statement
def camel2title(strings: List[str], sep: str = ' ', acronyms: Optional[List[str]] = None) -> List[str]:
if isinstance(strings, str) or not hasattr(strings, '__iter__'):
raise TypeError("camel2title() 'strings' argument must be iterable of strings")
if len(strings) == 0:
return strings
if not isinstance(strings[0], str):
raise TypeError("camel2title() 'strings' argument must be iterable of strings")
if not isinstance(sep, str) or len(sep) != 1:
raise ValueError(f"camel2title() 'sep' argument = '{sep}' must be single character")
if _re.match("[a-zA-Z0-9]", sep):
raise ValueError(f"camel2title() 'sep' argument = '{sep}' cannot be alpha-numeric")
if _re.escape(sep) != sep and sep not in {' ', '-'}:
# Permit some exceptions, I don't understand why they get escaped
raise ValueError(f"camel2title() 'sep' argument = '{sep}' cannot be special character")
if acronyms is None:
pat = "([a-z])([A-Z])"
rep = rf"\g<1>{sep}\g<2>"
return [_re.sub(pat, rep, s).title() for s in strings]
# Handling acronyms requires more care. Assumes Yahoo returns acronym strings upper-case
if isinstance(acronyms, str) or not hasattr(acronyms, '__iter__') or not isinstance(acronyms[0], str):
raise TypeError("camel2title() 'acronyms' argument must be iterable of strings")
for a in acronyms:
if not _re.match("^[A-Z]+$", a):
raise ValueError(f"camel2title() 'acronyms' argument must only contain upper-case, but '{a}' detected")
# Insert 'sep' between lower-then-upper-case
pat = "([a-z])([A-Z])"
rep = rf"\g<1>{sep}\g<2>"
strings = [_re.sub(pat, rep, s) for s in strings]
# Insert 'sep' after acronyms
for a in acronyms:
pat = f"({a})([A-Z][a-z])"
rep = rf"\g<1>{sep}\g<2>"
strings = [_re.sub(pat, rep, s) for s in strings]
# Apply str.title() to non-acronym words
strings = [s.split(sep) for s in strings]
strings = [[j.title() if not j in acronyms else j for j in s] for s in strings]
strings = [sep.join(s) for s in strings]
return strings
def snake_case_2_camelCase(s):
sc = s.split('_')[0] + ''.join(x.title() for x in s.split('_')[1:])
return sc
def _parse_user_dt(dt, exchange_tz):
if isinstance(dt, int):
## Should already be epoch, test with conversion:
# Should already be epoch, test with conversion:
_datetime.datetime.fromtimestamp(dt)
else:
# Convert str/date -> datetime, set tzinfo=exchange, get timestamp:
@@ -156,7 +322,21 @@ def _parse_user_dt(dt, exchange_tz):
return dt
def _interval_to_timedelta(interval):
if interval == "1mo":
return _dateutil.relativedelta.relativedelta(months=1)
elif interval == "3mo":
return _dateutil.relativedelta.relativedelta(months=3)
elif interval == "1y":
return _dateutil.relativedelta.relativedelta(years=1)
elif interval == "1wk":
return _pd.Timedelta(days=7, unit='d')
else:
return _pd.Timedelta(interval)
def auto_adjust(data):
col_order = data.columns
df = data.copy()
ratio = df["Close"] / df["Adj Close"]
df["Adj Open"] = df["Open"] / ratio
@@ -172,13 +352,13 @@ def auto_adjust(data):
"Adj Low": "Low", "Adj Close": "Close"
}, inplace=True)
df = df[["Open", "High", "Low", "Close", "Volume"]]
return df[["Open", "High", "Low", "Close", "Volume"]]
return df[[c for c in col_order if c in df.columns]]
def back_adjust(data):
""" back-adjusted data to mimic true historical prices """
col_order = data.columns
df = data.copy()
ratio = df["Adj Close"] / df["Close"]
df["Adj Open"] = df["Open"] * ratio
@@ -194,7 +374,7 @@ def back_adjust(data):
"Adj Low": "Low"
}, inplace=True)
return df[["Open", "High", "Low", "Close", "Volume"]]
return df[[c for c in col_order if c in df.columns]]
def parse_quotes(data):
@@ -226,6 +406,8 @@ def parse_quotes(data):
def parse_actions(data):
dividends = _pd.DataFrame(
columns=["Dividends"], index=_pd.DatetimeIndex([]))
capital_gains = _pd.DataFrame(
columns=["Capital Gains"], index=_pd.DatetimeIndex([]))
splits = _pd.DataFrame(
columns=["Stock Splits"], index=_pd.DatetimeIndex([]))
@@ -236,9 +418,16 @@ def parse_actions(data):
dividends.set_index("date", inplace=True)
dividends.index = _pd.to_datetime(dividends.index, unit="s")
dividends.sort_index(inplace=True)
dividends.columns = ["Dividends"]
if "capitalGains" in data["events"]:
capital_gains = _pd.DataFrame(
data=list(data["events"]["capitalGains"].values()))
capital_gains.set_index("date", inplace=True)
capital_gains.index = _pd.to_datetime(capital_gains.index, unit="s")
capital_gains.sort_index(inplace=True)
capital_gains.columns = ["Capital Gains"]
if "splits" in data["events"]:
splits = _pd.DataFrame(
data=list(data["events"]["splits"].values()))
@@ -246,25 +435,326 @@ def parse_actions(data):
splits.index = _pd.to_datetime(splits.index, unit="s")
splits.sort_index(inplace=True)
splits["Stock Splits"] = splits["numerator"] / \
splits["denominator"]
splits = splits["Stock Splits"]
splits["denominator"]
splits = splits[["Stock Splits"]]
return dividends, splits
return dividends, splits, capital_gains
def set_df_tz(df, interval, tz):
if df.index.tz is None:
df.index = df.index.tz_localize("UTC")
df.index = df.index.tz_convert(tz)
return df
def fix_Yahoo_returning_prepost_unrequested(quotes, interval, metadata):
# Sometimes Yahoo returns post-market data despite not requesting it.
# Normally happens on half-day early closes.
#
# And sometimes returns pre-market data despite not requesting it.
# E.g. some London tickers.
tps_df = metadata["tradingPeriods"]
tps_df["_date"] = tps_df.index.date
quotes["_date"] = quotes.index.date
idx = quotes.index.copy()
quotes = quotes.merge(tps_df, how="left", validate="many_to_one")
quotes.index = idx
# "end" = end of regular trading hours (including any auction)
f_drop = quotes.index >= quotes["end"]
f_drop = f_drop | (quotes.index < quotes["start"])
if f_drop.any():
# When printing report, ignore rows that were already NaNs:
f_na = quotes[["Open","Close"]].isna().all(axis=1)
n_nna = quotes.shape[0] - _np.sum(f_na)
n_drop_nna = _np.sum(f_drop & ~f_na)
quotes_dropped = quotes[f_drop]
# if debug and n_drop_nna > 0:
# print(f"Dropping {n_drop_nna}/{n_nna} intervals for falling outside regular trading hours")
quotes = quotes[~f_drop]
metadata["tradingPeriods"] = tps_df.drop(["_date"], axis=1)
quotes = quotes.drop(["_date", "start", "end"], axis=1)
return quotes
def fix_Yahoo_returning_live_separate(quotes, interval, tz_exchange):
# Yahoo bug fix. If market is open today then Yahoo normally returns
# todays data as a separate row from rest-of week/month interval in above row.
# Seems to depend on what exchange e.g. crypto OK.
# Fix = merge them together
n = quotes.shape[0]
if n > 1:
dt1 = quotes.index[n - 1]
dt2 = quotes.index[n - 2]
if quotes.index.tz is None:
dt1 = dt1.tz_localize("UTC")
dt2 = dt2.tz_localize("UTC")
dt1 = dt1.tz_convert(tz_exchange)
dt2 = dt2.tz_convert(tz_exchange)
if interval == "1d":
# Similar bug in daily data except most data is simply duplicated
# - exception is volume, *slightly* greater on final row (and matches website)
if dt1.date() == dt2.date():
# Last two rows are on same day. Drop second-to-last row
quotes = quotes.drop(quotes.index[n - 2])
else:
if interval == "1wk":
last_rows_same_interval = dt1.year == dt2.year and dt1.week == dt2.week
elif interval == "1mo":
last_rows_same_interval = dt1.month == dt2.month
elif interval == "3mo":
last_rows_same_interval = dt1.year == dt2.year and dt1.quarter == dt2.quarter
else:
last_rows_same_interval = (dt1-dt2) < _pd.Timedelta(interval)
if last_rows_same_interval:
# Last two rows are within same interval
idx1 = quotes.index[n - 1]
idx2 = quotes.index[n - 2]
if _np.isnan(quotes.loc[idx2, "Open"]):
quotes.loc[idx2, "Open"] = quotes["Open"][n - 1]
# Note: nanmax() & nanmin() ignores NaNs
quotes.loc[idx2, "High"] = _np.nanmax([quotes["High"][n - 1], quotes["High"][n - 2]])
quotes.loc[idx2, "Low"] = _np.nanmin([quotes["Low"][n - 1], quotes["Low"][n - 2]])
quotes.loc[idx2, "Close"] = quotes["Close"][n - 1]
if "Adj High" in quotes.columns:
quotes.loc[idx2, "Adj High"] = _np.nanmax([quotes["Adj High"][n - 1], quotes["Adj High"][n - 2]])
if "Adj Low" in quotes.columns:
quotes.loc[idx2, "Adj Low"] = _np.nanmin([quotes["Adj Low"][n - 1], quotes["Adj Low"][n - 2]])
if "Adj Close" in quotes.columns:
quotes.loc[idx2, "Adj Close"] = quotes["Adj Close"][n - 1]
quotes.loc[idx2, "Volume"] += quotes["Volume"][n - 1]
quotes = quotes.drop(quotes.index[n - 1])
return quotes
def safe_merge_dfs(df_main, df_sub, interval):
# Carefully merge 'df_sub' onto 'df_main'
# If naive merge fails, try again with reindexing df_sub:
# 1) if interval is weekly or monthly, then try with index set to start of week/month
# 2) if still failing then manually search through df_main.index to reindex df_sub
if df_sub.shape[0] == 0:
raise Exception("No data to merge")
df_sub_backup = df_sub.copy()
data_cols = [c for c in df_sub.columns if c not in df_main]
if len(data_cols) > 1:
raise Exception("Expected 1 data col")
data_col = data_cols[0]
def _reindex_events(df, new_index, data_col_name):
if len(new_index) == len(set(new_index)):
# No duplicates, easy
df.index = new_index
return df
df["_NewIndex"] = new_index
# Duplicates present within periods but can aggregate
if data_col_name == "Dividends":
# Add
df = df.groupby("_NewIndex").sum()
df.index.name = None
elif data_col_name == "Stock Splits":
# Product
df = df.groupby("_NewIndex").prod()
df.index.name = None
else:
raise Exception("New index contains duplicates but unsure how to aggregate for '{}'".format(data_col_name))
if "_NewIndex" in df.columns:
df = df.drop("_NewIndex", axis=1)
return df
df = df_main.join(df_sub)
f_na = df[data_col].isna()
data_lost = sum(~f_na) < df_sub.shape[0]
if not data_lost:
return df
# Lost data during join()
# Backdate all df_sub.index dates to start of week/month
if interval == "1wk":
new_index = _pd.PeriodIndex(df_sub.index, freq='W').to_timestamp()
elif interval == "1mo":
new_index = _pd.PeriodIndex(df_sub.index, freq='M').to_timestamp()
elif interval == "3mo":
new_index = _pd.PeriodIndex(df_sub.index, freq='Q').to_timestamp()
else:
new_index = None
if new_index is not None:
new_index = new_index.tz_localize(df.index.tz, ambiguous=True, nonexistent='shift_forward')
df_sub = _reindex_events(df_sub, new_index, data_col)
df = df_main.join(df_sub)
f_na = df[data_col].isna()
data_lost = sum(~f_na) < df_sub.shape[0]
if not data_lost:
return df
# Lost data during join(). Manually check each df_sub.index date against df_main.index to
# find matching interval
df_sub = df_sub_backup.copy()
new_index = [-1] * df_sub.shape[0]
for i in range(df_sub.shape[0]):
dt_sub_i = df_sub.index[i]
if dt_sub_i in df_main.index:
new_index[i] = dt_sub_i
continue
# Found a bad index date, need to search for near-match in df_main (same week/month)
fixed = False
for j in range(df_main.shape[0] - 1):
dt_main_j0 = df_main.index[j]
dt_main_j1 = df_main.index[j + 1]
if (dt_main_j0 <= dt_sub_i) and (dt_sub_i < dt_main_j1):
fixed = True
if interval.endswith('h') or interval.endswith('m'):
# Must also be same day
fixed = (dt_main_j0.date() == dt_sub_i.date()) and (dt_sub_i.date() == dt_main_j1.date())
if fixed:
dt_sub_i = dt_main_j0
break
if not fixed:
last_main_dt = df_main.index[df_main.shape[0] - 1]
diff = dt_sub_i - last_main_dt
if interval == "1mo" and last_main_dt.month == dt_sub_i.month:
dt_sub_i = last_main_dt
fixed = True
elif interval == "3mo" and last_main_dt.year == dt_sub_i.year and last_main_dt.quarter == dt_sub_i.quarter:
dt_sub_i = last_main_dt
fixed = True
elif interval == "1wk":
if last_main_dt.week == dt_sub_i.week:
dt_sub_i = last_main_dt
fixed = True
elif (dt_sub_i >= last_main_dt) and (dt_sub_i - last_main_dt < _datetime.timedelta(weeks=1)):
# With some specific start dates (e.g. around early Jan), Yahoo
# messes up start-of-week, is Saturday not Monday. So check
# if same week another way
dt_sub_i = last_main_dt
fixed = True
elif interval == "1d" and last_main_dt.day == dt_sub_i.day:
dt_sub_i = last_main_dt
fixed = True
elif interval == "1h" and last_main_dt.hour == dt_sub_i.hour:
dt_sub_i = last_main_dt
fixed = True
elif interval.endswith('m') or interval.endswith('h'):
td = _pd.to_timedelta(interval)
if (dt_sub_i >= last_main_dt) and (dt_sub_i - last_main_dt < td):
dt_sub_i = last_main_dt
fixed = True
new_index[i] = dt_sub_i
df_sub = _reindex_events(df_sub, new_index, data_col)
df = df_main.join(df_sub)
f_na = df[data_col].isna()
data_lost = sum(~f_na) < df_sub.shape[0]
if data_lost:
## Not always possible to match events with trading, e.g. when released pre-market.
## So have to append to bottom with nan prices.
## But should only be impossible with intra-day price data.
if interval.endswith('m') or interval.endswith('h') or interval == "1d":
# Update: is possible with daily data when dividend very recent
f_missing = ~df_sub.index.isin(df.index)
df_sub_missing = df_sub[f_missing].copy()
keys = {"Adj Open", "Open", "Adj High", "High", "Adj Low", "Low", "Adj Close",
"Close"}.intersection(df.columns)
df_sub_missing[list(keys)] = _np.nan
col_ordering = df.columns
df = _pd.concat([df, df_sub_missing], sort=True)[col_ordering]
else:
raise Exception("Lost data during merge despite all attempts to align data (see above)")
return df
def fix_Yahoo_dst_issue(df, interval):
if interval in ["1d","1w","1wk"]:
if interval in ["1d", "1w", "1wk"]:
# These intervals should start at time 00:00. But for some combinations of date and timezone,
# Yahoo has time off by few hours (e.g. Brazil 23:00 around Jan-2022). Suspect DST problem.
# The clue is (a) minutes=0 and (b) hour near 0.
# Obviously Yahoo meant 00:00, so ensure this doesn't affect date conversion:
f_pre_midnight = (df.index.minute == 0) & (df.index.hour.isin([22,23]))
dst_error_hours = _np.array([0]*df.shape[0])
dst_error_hours[f_pre_midnight] = 24-df.index[f_pre_midnight].hour
f_pre_midnight = (df.index.minute == 0) & (df.index.hour.isin([22, 23]))
dst_error_hours = _np.array([0] * df.shape[0])
dst_error_hours[f_pre_midnight] = 24 - df.index[f_pre_midnight].hour
df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
return df
def is_valid_timezone(tz: str) -> bool:
try:
_tz.timezone(tz)
except UnknownTimeZoneError:
return False
return True
def format_history_metadata(md):
if not isinstance(md, dict):
return md
if len(md) == 0:
return md
tz = md["exchangeTimezoneName"]
for k in ["firstTradeDate", "regularMarketTime"]:
if k in md and md[k] is not None:
md[k] = _pd.to_datetime(md[k], unit='s', utc=True).tz_convert(tz)
if "currentTradingPeriod" in md:
for m in ["regular", "pre", "post"]:
if m in md["currentTradingPeriod"]:
for t in ["start", "end"]:
md["currentTradingPeriod"][m][t] = \
_pd.to_datetime(md["currentTradingPeriod"][m][t], unit='s', utc=True).tz_convert(tz)
del md["currentTradingPeriod"][m]["gmtoffset"]
del md["currentTradingPeriod"][m]["timezone"]
if "tradingPeriods" in md:
if md["tradingPeriods"] == {"pre":[], "post":[]}:
del md["tradingPeriods"]
if "tradingPeriods" in md:
tps = md["tradingPeriods"]
if isinstance(tps, list):
# Only regular times
regs_dict = [tps[i][0] for i in range(len(tps))]
pres_dict = None
posts_dict = None
elif isinstance(tps, dict):
# Includes pre- and post-market
pres_dict = [tps["pre"][i][0] for i in range(len(tps["pre"]))]
posts_dict = [tps["post"][i][0] for i in range(len(tps["post"]))]
regs_dict = [tps["regular"][i][0] for i in range(len(tps["regular"]))]
else:
raise Exception()
def _dict_to_table(d):
df = _pd.DataFrame.from_dict(d).drop(["timezone", "gmtoffset"], axis=1)
df["end"] = _pd.to_datetime(df["end"], unit='s', utc=True).dt.tz_convert(tz)
df["start"] = _pd.to_datetime(df["start"], unit='s', utc=True).dt.tz_convert(tz)
df.index = _pd.to_datetime(df["start"].dt.date)
df.index = df.index.tz_localize(tz)
return df
df = _dict_to_table(regs_dict)
df_cols = ["start", "end"]
if pres_dict is not None:
pre_df = _dict_to_table(pres_dict)
df = df.merge(pre_df.rename(columns={"start":"pre_start", "end":"pre_end"}), left_index=True, right_index=True)
df_cols = ["pre_start", "pre_end"]+df_cols
if posts_dict is not None:
post_df = _dict_to_table(posts_dict)
df = df.merge(post_df.rename(columns={"start":"post_start", "end":"post_end"}), left_index=True, right_index=True)
df_cols = df_cols+["post_start", "post_end"]
df = df[df_cols]
df.index.name = "Date"
md["tradingPeriods"] = df
return md
class ProgressBar:
def __init__(self, iterations, text='completed'):
self.text = text
@@ -305,54 +795,176 @@ class ProgressBar:
all_full = self.width - 2
num_hashes = int(round((percent_done / 100.0) * all_full))
self.prog_bar = '[' + self.fill_char * \
num_hashes + ' ' * (all_full - num_hashes) + ']'
num_hashes + ' ' * (all_full - num_hashes) + ']'
pct_place = (len(self.prog_bar) // 2) - len(str(percent_done))
pct_string = '%d%%' % percent_done
self.prog_bar = self.prog_bar[0:pct_place] + \
(pct_string + self.prog_bar[pct_place + len(pct_string):])
(pct_string + self.prog_bar[pct_place + len(pct_string):])
def __str__(self):
return str(self.prog_bar)
# Simple file cache of ticker->timezone:
_cache_dp = None
def get_cache_dirpath():
if _cache_dp is None:
dp = _os.path.join(_ad.user_cache_dir(), "py-yfinance")
else:
dp = _os.path.join(_cache_dp, "py-yfinance")
return dp
def set_tz_cache_location(dp):
global _cache_dp
_cache_dp = dp
# ---------------------------------
# TimeZone cache related code
# ---------------------------------
def cache_lookup_tkr_tz(tkr):
fp = _os.path.join(get_cache_dirpath(), "tkr-tz.csv")
if not _os.path.isfile(fp):
class _KVStore:
"""Simpel Sqlite backed key/value store, key and value are strings. Should be thread safe."""
def __init__(self, filename):
self._cache_mutex = Lock()
with self._cache_mutex:
self.conn = _sqlite3.connect(filename, timeout=10, check_same_thread=False)
self.conn.execute('pragma journal_mode=wal')
try:
self.conn.execute('create table if not exists "kv" (key TEXT primary key, value TEXT) without rowid')
except Exception as e:
if 'near "without": syntax error' in str(e):
# "without rowid" requires sqlite 3.8.2. Older versions will raise exception
self.conn.execute('create table if not exists "kv" (key TEXT primary key, value TEXT)')
else:
raise
self.conn.commit()
_atexit.register(self.close)
def close(self):
if self.conn is not None:
with self._cache_mutex:
self.conn.close()
self.conn = None
def get(self, key: str) -> Union[str, None]:
"""Get value for key if it exists else returns None"""
item = self.conn.execute('select value from "kv" where key=?', (key,))
if item:
return next(item, (None,))[0]
def set(self, key: str, value: str) -> None:
with self._cache_mutex:
self.conn.execute('replace into "kv" (key, value) values (?,?)', (key, value))
self.conn.commit()
def bulk_set(self, kvdata: Dict[str, str]):
records = tuple(i for i in kvdata.items())
with self._cache_mutex:
self.conn.executemany('replace into "kv" (key, value) values (?,?)', records)
self.conn.commit()
def delete(self, key: str):
with self._cache_mutex:
self.conn.execute('delete from "kv" where key=?', (key,))
self.conn.commit()
class _TzCacheException(Exception):
pass
class _TzCache:
"""Simple sqlite file cache of ticker->timezone"""
def __init__(self):
self._setup_cache_folder()
# Must init db here, where is thread-safe
self._tz_db = _KVStore(_os.path.join(self._db_dir, "tkr-tz.db"))
self._migrate_cache_tkr_tz()
def _setup_cache_folder(self):
if not _os.path.isdir(self._db_dir):
try:
_os.makedirs(self._db_dir)
except OSError as err:
raise _TzCacheException("Error creating TzCache folder: '{}' reason: {}"
.format(self._db_dir, err))
elif not (_os.access(self._db_dir, _os.R_OK) and _os.access(self._db_dir, _os.W_OK)):
raise _TzCacheException("Cannot read and write in TzCache folder: '{}'"
.format(self._db_dir, ))
def lookup(self, tkr):
return self.tz_db.get(tkr)
def store(self, tkr, tz):
if tz is None:
self.tz_db.delete(tkr)
elif self.tz_db.get(tkr) is not None:
raise Exception("Tkr {} tz already in cache".format(tkr))
else:
self.tz_db.set(tkr, tz)
@property
def _db_dir(self):
global _cache_dir
return _os.path.join(_cache_dir, "py-yfinance")
@property
def tz_db(self):
return self._tz_db
def _migrate_cache_tkr_tz(self):
"""Migrate contents from old ticker CSV-cache to SQLite db"""
old_cache_file_path = _os.path.join(self._db_dir, "tkr-tz.csv")
if not _os.path.isfile(old_cache_file_path):
return None
try:
df = _pd.read_csv(old_cache_file_path, index_col="Ticker")
except _pd.errors.EmptyDataError:
_os.remove(old_cache_file_path)
else:
self.tz_db.bulk_set(df.to_dict()['Tz'])
_os.remove(old_cache_file_path)
class _TzCacheDummy:
"""Dummy cache to use if tz cache is disabled"""
def lookup(self, tkr):
return None
df = _pd.read_csv(fp)
f = df["Ticker"] == tkr
if sum(f) == 0:
def store(self, tkr, tz):
pass
@property
def tz_db(self):
return None
return df["Tz"][f].iloc[0]
def cache_store_tkr_tz(tkr,tz):
df = _pd.DataFrame({"Ticker":[tkr], "Tz":[tz]})
dp = get_cache_dirpath()
if not _os.path.isdir(dp):
_os.makedirs(dp)
fp = _os.path.join(dp, "tkr-tz.csv")
if not _os.path.isfile(fp):
df.to_csv(fp, index=False)
return
def get_tz_cache():
"""
Get the timezone cache, initializes it and creates cache folder if needed on first call.
If folder cannot be created for some reason it will fall back to initialize a
dummy cache with same interface as real cash.
"""
# as this can be called from multiple threads, protect it.
with _cache_init_lock:
global _tz_cache
if _tz_cache is None:
try:
_tz_cache = _TzCache()
except _TzCacheException as err:
print("Failed to create TzCache, reason: {}".format(err))
print("TzCache will not be used.")
print("Tip: You can direct cache to use a different location with 'set_tz_cache_location(mylocation)'")
_tz_cache = _TzCacheDummy()
df_all = _pd.read_csv(fp)
f = df_all["Ticker"]==tkr
if sum(f) > 0:
raise Exception("Tkr {} tz already in cache".format(tkr))
return _tz_cache
_pd.concat([df_all,df]).to_csv(fp, index=False)
_cache_dir = _ad.user_cache_dir()
_cache_init_lock = Lock()
_tz_cache = None
def set_tz_cache_location(cache_dir: str):
"""
Sets the path to create the "py-yfinance" cache folder in.
Useful if the default folder returned by "appdir.user_cache_dir()" is not writable.
Must be called before cache is used (that is, before fetching tickers).
:param cache_dir: Path to use for caches
:return: None
"""
global _cache_dir, _tz_cache
assert _tz_cache is None, "Time Zone cache already initialized, setting path must be done before cache is created"
_cache_dir = cache_dir

View File

@@ -1 +1 @@
version = "0.1.81"
version = "0.2.12"