Compare commits

...

134 Commits

Author SHA1 Message Date
ValueRaider
a1bcb4c351 Version 0.2.38 2024-04-16 22:18:37 +01:00
ValueRaider
18089f451b Merge pull request #1908 from vittoboa/holders_404_error
Fix 404 Error for Holders
2024-04-16 22:14:33 +01:00
vittoboa
3d180fcf2c Move ticker symbol from parameter to URL 2024-04-16 22:37:07 +02:00
Value Raider
c2d568367c Version 0.2.37 2024-02-25 13:25:28 +00:00
ValueRaider
d3728d3071 Merge pull request #1869 from ranaroussi/dev
Dev
2024-02-24 23:09:34 +00:00
ValueRaider
915bb1a080 Merge pull request #1866 from ranaroussi/fix/price-repair-confusing-order
Price repair bug-fix
2024-02-24 22:58:20 +00:00
Value Raider
d55c317158 Fix bug: prices order flipping during repair, introducing potential data corruption 2024-02-19 22:17:20 +00:00
ValueRaider
ac1d09049e Merge pull request #1865 from cottrell/fix
Fix some errors.
2024-02-19 22:11:48 +00:00
David Cottrell
afb4e0d5dc Fix some errors. 2024-02-19 21:43:12 +00:00
ValueRaider
1d31e7ca01 Update issue form - more emphasis on following instructions 2024-02-11 13:47:36 +00:00
ValueRaider
683064f9ad Merge pull request #1849 from ranaroussi/refactor/price-history 2024-02-07 23:11:51 +00:00
Value Raider
cdf897f9e6 Move price history+repair logic into new file 2024-02-04 13:09:37 +00:00
ValueRaider
eab6c8dfa7 Update bug_report.yaml because people can't read 2024-02-01 21:28:38 +00:00
ValueRaider
97f93d35ed Merge pull request #1844 from power-edge/dev
adding upgrade for pandas deprecation warning, adding pyarrow>=0.17.0…
2024-01-31 21:51:05 +00:00
Nikolaus Schuetz
5aef8addab removing dev requirements (they are included by extras) 2024-01-29 17:43:12 -05:00
ValueRaider
6b8a4a5608 Merge pull request #1841 from Rogach/pr/dont-disable-global-logging
do not disable app-wide logging in quote.py (fixes #1829)
2024-01-28 16:29:43 +00:00
Platon Pronko
212a7987c3 do not disable app-wide logging in quote.py (fixes #1829) 2024-01-28 20:43:50 +05:00
Nikolaus Schuetz
58a0a57457 adding upgrade for pandas deprecation warning, adding pyarrow>=0.17.0 at minimum requirement as defined in dev requirements for pandas==1.3.0 version 2024-01-26 20:12:58 -05:00
ValueRaider
75297c0eba Merge pull request #1838 from mreiche/bugfix/remove-empty-series
Remove _empty_series leftovers
2024-01-23 19:07:16 +00:00
Mike Reiche
1dc2719368 Remove _empty_series leftovers 2024-01-23 15:32:56 +01:00
Value Raider
ab979e9141 Version 0.2.36 2024-01-21 18:10:41 +00:00
ValueRaider
b837c1ec2a Merge pull request #1834 from ranaroussi/dev
sync dev -> main
2024-01-21 18:08:04 +00:00
ValueRaider
2630c66cd1 Merge pull request #1833 from ange-daumal/json-fix
Fix JSON error handling
2024-01-19 21:56:42 +00:00
ValueRaider
7af789fe9a Merge pull request #1830 from ange-daumal/patch-1
Fix JSON error handling
2024-01-19 21:51:52 +00:00
ValueRaider
73e36688b7 Merge pull request #1827 from ranaroussi/fix/peewee-with-old-sqlite
Handle peewee with old sqlite
2024-01-19 21:51:31 +00:00
ValueRaider
f1264716fc Merge pull request #1824 from ranaroussi/fix/price-keepna-with-repair
Fix history() keepna=False with repair=True
2024-01-19 21:51:10 +00:00
Ange Daumal
06fd35121a Fix JSON access to prevent KeyError 2024-01-19 22:51:02 +01:00
Mike Reiche
91f468e4d3 Fix JSON access to prevent KeyError 2024-01-19 22:42:42 +01:00
ValueRaider
d00c1a976c Merge pull request #1831 from ranaroussi/main
sync main -> dev
2024-01-15 19:28:56 +00:00
ValueRaider
176c3d628b Update ci.yml to Node16 2024-01-15 19:27:37 +00:00
ValueRaider
8f53af1593 Merge pull request #1823 from molpcs/patch-2
Update README.md for better copy-ability
2024-01-14 12:36:17 +00:00
ValueRaider
19188d52d4 Merge pull request #1795 from amanlai/main
explicitly name the column levels
2024-01-14 10:45:04 +00:00
Value Raider
ffaf200562 Handle peewee with old sqlite 2024-01-13 23:00:59 +00:00
Value Raider
6686258e66 Fix history() keepna=False with repair=True 2024-01-13 13:19:44 +00:00
molpcs
47bc46c804 Update README.md
Wrap yfinance[optional] code snippet with quotes to avoid conflict with zsh globbing. Remains compatible with bash.
2024-01-12 11:57:58 -08:00
ValueRaider
f563e51509 Merge pull request #1822 from akshayparopkari/patch-1 2024-01-11 09:30:22 +00:00
Akshay Paropkari
c5404bcd9d Update fundamentals.py
Error in supplying timescale values resulted in misleading ValueError - 

```
ValueError: Illegal argument: timescale must be one of: ['income', 'balance-sheet', 'cash-flow']
```
2024-01-11 05:17:39 +00:00
ValueRaider
006e0a155b Merge pull request #1724 from mreiche/bugfix/data-types-2
Bugfix/data types 2
2024-01-09 20:13:41 +00:00
Mike Reiche
dbc55e5596 Remove unused List import 2024-01-09 21:08:46 +01:00
Mike Reiche
4ce63fe8ca Merge remote-tracking branch 'yfinance/dev' into bugfix/data-types-2 2024-01-09 08:51:33 +01:00
Mike Reiche
223f5337a8 Remove empty static series 2024-01-09 08:50:31 +01:00
Mike Reiche
4c34487149 Revert disabling earnings test 2024-01-09 08:50:00 +01:00
Mike Reiche
ac8a917288 Revert adding explicit requirements 2024-01-09 08:43:54 +01:00
Mike Reiche
15321bd097 Merge remote-tracking branch 'yfinance/main' into bugfix/data-types-2 2024-01-09 08:42:43 +01:00
ValueRaider
10961905b6 Merge pull request #1817 from ranaroussi/main
sync main -> dev
2024-01-07 18:39:10 +00:00
ValueRaider
acbd2a8d78 Merge pull request #1816 from ranaroussi/fix/ticker-api
0.2.34 fixes: Add new data to README, remove deprecated stuff, fix tests
2024-01-07 00:35:06 +00:00
Value Raider
61c4696c65 Add new data to README, remove deprecated stuff, fix tests, v0.2.35
Ticker.recommendations*:
- add to README
- organise their unit tests
- remove redundant recommendations_history

Remove deprecated arguments from Ticker.history

Fix 'bad symbol' behaviour & tests
Fix some prices tests

Bump version 0.2.35
2024-01-07 00:33:59 +00:00
Value Raider
a7c41afa52 Version 0.2.34 2024-01-06 17:19:51 +00:00
ValueRaider
49d8dfd544 Merge pull request #1815 from ranaroussi/dev
sync dev -> main
2024-01-06 16:18:20 +00:00
ValueRaider
477dc6e6c4 Merge pull request #1798 from ranaroussi/fix/price-repair-div-adjust
Fix price repair div adjust
2023-12-31 21:43:44 +00:00
ValueRaider
7e6ad0834c Merge pull request #1806 from puntonim/ticker-history-exc-hook
Ticker.history() to raise HTTP request excs if raise_errors args is True
2023-12-31 14:09:12 +00:00
puntonim
c94cbb64d4 Ticker.history() to raise HTTP request excs if raise_errors args is True 2023-12-31 14:57:47 +01:00
ValueRaider
c053e2cb30 Merge pull request #1807 from ranaroussi/feature/optional-reqs-min-versions
Set sensible min versions for optional 'nospam' reqs
2023-12-31 13:47:17 +00:00
Value Raider
112b297c41 Set sensible min versions for optional 'nospam' reqs
Set sensible min versions for optional 'nospam' reqs:
- requests_cache >= 1.0 , first defined DO_NOT_CACHE
2023-12-31 13:45:26 +00:00
ValueRaider
5195c3a798 Merge pull request #1810 from Tejasweee/dev
make nan as float
2023-12-31 12:56:26 +00:00
Tejasweee
c1ad2589da make nan as float 2023-12-31 09:29:19 +05:45
ValueRaider
d1a34a4da0 Merge pull request #1796 from ranaroussi/fix/cookie-cache-date
Fix invalid date entering cache DB
2023-12-30 17:32:14 +00:00
Value Raider
d44eff4065 Fix 'Unalignable' error in reconstruct_intervals 2023-12-22 20:29:04 +00:00
Value Raider
db670aefd7 Fix invalid date entering cache DB
'peewee.DateTimeField' is not ISO-compliant. If user enforces strict ISO-compliance,
then translation between DateTimeField and sqlite breaks. Fix is to manually
implement translation.
2023-12-22 12:59:50 +00:00
Manlai Amar
a3095d2a40 explicitly name the column levels 2023-12-21 00:02:53 -08:00
ValueRaider
f753e6090d Merge pull request #1793 from ranaroussi/fix/fetch-tkr-tz
Fix _get_ticker_tz() args, were being swapped. Improve its unit test
2023-12-17 18:59:06 +00:00
Value Raider
9021fe52b4 Fix _get_ticker_tz() args, were being swapped. Improve its unit test 2023-12-17 18:35:29 +00:00
ValueRaider
281cc64a4a Merge pull request #1790 from bot-unit/feature/calendar
feature calendar events
2023-12-16 13:37:19 +00:00
ValueRaider
8975689bd1 README: add cache folder location 2023-12-16 13:36:40 +00:00
Unit
24f53e935d added calendar events
added events from calendarEvents module
returning data is dict
test upgraded and passed
2023-12-16 13:35:04 +01:00
ValueRaider
a6790606ef Merge pull request #1774 from coskos-ops/fix/complementaryinfo
Fixed incorrect code for ticker complementary info retrieval
2023-12-14 17:57:26 +00:00
Filip Kostic
122269cf53 Fixed fstring error 2023-12-13 19:45:47 -05:00
ValueRaider
a914647fa4 Merge pull request #1772 from JuliaLWang8/feat/holders-insiders
Feat/Holders insider data
2023-12-13 22:13:24 +00:00
Julia L. Wang
dc957eeb0e Implementation of holders data 2023-12-13 16:57:13 -05:00
ValueRaider
f8d65d0def Merge pull request #1773 from bot-unit/feature/upgrades_downgrades
add upgrades downgrades
2023-12-13 20:59:58 +00:00
ValueRaider
f32097e157 Merge pull request #1771 from JuliaLWang8/feat/extra-dependencies
Feat/adding extra dependencies
2023-12-13 20:58:48 +00:00
Value Raider
469037be80 Tweaks to formatting and links. 2023-12-13 19:26:05 +00:00
Julia L. Wang
9648e69b7e Updated scipy and readme 2023-12-12 18:26:04 -05:00
ValueRaider
f718db6c2f Merge pull request #1776 from coskos-ops/fix/progressSTDerr 2023-12-12 23:08:28 +00:00
Filip Kostic
c8280e4001 Update utils.py 2023-12-12 17:45:26 -05:00
ValueRaider
53c29480b6 Merge pull request #1779 from VishnuAkundi/invalid_symbol_date_fix
Fix for Key Error Issue on Date column when one of the symbols is no longer valid (delisted/not available)
2023-12-12 21:16:39 +00:00
Vishnu Akundi
4a5616d5c4 Added Fix and Unit Test for Issue 2023-12-12 11:29:58 -05:00
Filip Kostic
5e0006e4b3 Removed redundant import 2023-12-11 15:07:16 -05:00
Filip Kostic
2b1a26ef0c Moved progress bar output to stderr 2023-12-10 20:51:11 -05:00
Filip Kostic
8fdf53233f Fixed issue #1305. Added test case to test for trailingPegInfo statistic retrieval 2023-12-10 17:54:08 -05:00
Unit
4175885747 add upgrades downgrades
add upgrades/downgrades (recommendations history)
return data is pandas dataframe
add test for upgrades/downgrades data
2023-12-10 22:35:53 +01:00
ValueRaider
580502941a Merge pull request #1766 from JuliaLWang8/pandas-future-proofing
Pandas future proofing
2023-12-10 20:33:46 +00:00
Julia L. Wang
1863b211cd Added extra dependencies 2023-12-10 10:36:02 -05:00
Julia L. Wang
0bcd2dc725 Removed unnecessary iloc 2023-12-09 23:08:22 -05:00
ValueRaider
c60e590bd7 Merge pull request #1768 from ranaroussi/fix/price-repair-and-tests
Minor fixes for price repair and related tests
2023-12-09 21:45:25 +00:00
ValueRaider
fce4707340 Merge pull request #1756 from marco-carvalho/ruff
Add Ruff
2023-12-09 21:33:28 +00:00
Value Raider
f7825c1c3a Minor fixes for price repair and related tests
Minor fixes for price repair and related tests:
- update out-of-date test, remove delisted ticker
- fix Numpy type mismatch error
2023-12-09 19:40:20 +00:00
Marco Carvalho
27ef2bcd1a Update ruff.yml 2023-12-09 13:18:35 +00:00
Marco Carvalho
fb2006b814 add ruff 2023-12-09 13:18:35 +00:00
Julia L. Wang
9b9158050a Pandas future proofing (tested)
Changed fillna, iloc, and added test changes
2023-12-08 04:26:04 -05:00
ValueRaider
f30e4ebd4c Merge pull request #1764 from ranaroussi/main
sync main -> dev
2023-12-07 09:40:56 +00:00
Value Raider
f08fe83290 Version 0.2.33 2023-12-06 19:49:23 +00:00
ValueRaider
ca2040f5fd Merge pull request #1759 from ranaroussi/hotfix/cookie-fallback-strategy
Fix '_set_cookie_strategy'
2023-12-06 19:47:40 +00:00
ValueRaider
1cfeddff59 Merge pull request #1760 from bot-unit/bugfixes/from_isin
fix base class init method
2023-12-06 19:47:32 +00:00
Value Raider
1ab476b14f Fix '_set_cookie_strategy', was double-toggling. + more logging 2023-12-06 19:46:01 +00:00
Unit
ae2ae7bce4 fix class init method 2023-12-05 19:19:03 +00:00
ValueRaider
1d3ef4f733 Merge pull request #1754 from bot-unit/feature/recommendations
Feature/recommendations
2023-12-02 19:43:35 +00:00
Unit
a3ac9fc72d added recommendations
added valid modules for quote summary request
added _fetch method for fetching quote summary
added fetch recommendationTrend
2023-12-02 15:46:17 +01:00
ValueRaider
03a1f03583 Create CODE_OF_CONDUCT.md 2023-11-26 13:08:54 +00:00
ValueRaider
af9a356fd5 Merge pull request #1745 from ranaroussi/main
sync main -> dev
2023-11-19 11:10:03 +00:00
Value Raider
9b6e35bdcd Version 0.2.32 2023-11-18 12:56:06 +00:00
ValueRaider
4d4e56cdc8 Merge pull request #1657 from ranaroussi/feature/cookie-and-crumb
Add cookie & crumb to requests
2023-11-18 12:54:24 +00:00
Value Raider
91efcd8f7d Final tidy before merge 2023-11-18 12:53:42 +00:00
Value Raider
63a3531edc Remove dependence on python>3.8 2023-11-16 20:29:02 +00:00
Value Raider
1b0d8357d6 Beta version 0.2.32b1 2023-11-13 20:29:51 +00:00
Value Raider
4466e57b95 Add cookie & crumb to requests
Add cookie & crumb to requests. Involves several changes:
- fetch cookie & crumb, obviously.
- two different cookie strategies - one seems to work better in USA, other better outside.
- yfinance auto-detects if one strategy fails, and switches to other strategy.
- cookie is stored in persistent cache folder, alongside timezones. Refetched after 24 hours.

To have this work well with multithreading (yfinance.download()) requires more changes:
- all threads share the same cookie, therefore the same session object. Requires thread-safety ...
- converted data class to a singleton with "SingletonMeta":
 - the first init() call initialises data.
 - but successive calls update its session object - naughty but necessary.
- thread locks to avoid deadlocks and race conditions.
2023-11-13 19:35:12 +00:00
ValueRaider
6d3d6b659c Merge pull request #1740 from mikez/main
Fix pandas FutureWarning: "Passing literal html to 'read_html' is deprecated"
2023-11-09 20:20:56 +00:00
Value Raider
b696add360 Restore 'earnings_dates' unit tests 2023-11-09 20:18:32 +00:00
Michael B.
06751a0b9c Fix pandas FutureWarning: "Passing literal html to 'read_html' is deprecated"
This addresses #1685 (`institutional_holders`) and also `get_earnings_dates()`.

Pandas issue is found here:
https://github.com/pandas-dev/pandas/issues/53767
and the change in code here:
5cedf87ccc/pandas/io/html.py (L1238)

As for legacy Python 2.7 support: `io.StringIO` seems to be supported in
the versions I tested. See https://docs.python.org/2/library/io.html
2023-11-09 17:42:22 +01:00
Mike Reiche
ba3c1b5ac6 Merge remote-tracking branch 'yfinance/dev' into bugfix/data-types-2
# Conflicts:
#	yfinance/base.py
2023-10-21 12:44:39 +02:00
ValueRaider
7432d2939c Merge pull request #1711 from rickturner2001/refactor/ticker-proxy
Refactor/ticker proxy
2023-10-18 20:08:05 +01:00
Mike Reiche
ba977a16a2 Added tests 2023-10-12 08:53:16 +02:00
Mike Reiche
9a3d60105c Minor typing fixes 2023-10-12 08:53:16 +02:00
Mike Reiche
0521428f69 Fixed typing bug when series are empty 2023-10-12 08:53:15 +02:00
Value Raider
308e58b914 Bump version to 0.2.31 2023-10-04 22:03:24 +01:00
ValueRaider
f6beadf448 Merge pull request #1716 from ranaroussi/dev
sync dev -> main
2023-10-04 22:01:43 +01:00
rickturner2001
d607c43967 refactored Ticker proxy attribute 2023-10-01 21:46:59 -04:00
rickturner2001
4c1669ad9d Refactored tests for Ticker with proxy
Ticker proxy refactor
2023-10-01 21:08:49 -04:00
Value Raider
7da64b679e Dev version 0.2.31b2 2023-10-01 22:40:15 +01:00
ValueRaider
38f8ccd40a Merge pull request #1709 from ranaroussi/feature/tz-cache-lazy-load
Feature/tz cache lazy load
2023-10-01 22:39:19 +01:00
ValueRaider
13acc3dc97 Merge pull request #1707 from rickturner2001/fix-testing
Test Fix: Check for type and expect exceptions in tests
2023-10-01 21:19:31 +01:00
Value Raider
cc1ac7bbcc Fix cache on read-only filesystem, + tests 2023-10-01 21:17:59 +01:00
rickturner2001
75449fd0ac Merge branch 'dev' into fix-testing 2023-10-01 15:08:12 -04:00
Value Raider
22e0c414c4 Rename for clarity 2023-10-01 13:04:48 +01:00
Value Raider
37d60e6efb Complete TZ cache lazy-loading
The initial singleton design pattern for database access meant that lazy-loading was broken,
due to structure of '_KV' class. So errors were blocking import.
Fix = use 'peewee' proxy database and initialise when needed.
2023-10-01 12:53:49 +01:00
Value Raider
dac9a48742 Dev version 0.2.31b1 2023-10-01 10:06:34 +01:00
rickturner2001
bd52326091 Fix testing: Fixed broken tests and refactored code 2023-09-30 18:23:03 -04:00
ValueRaider
9581b8bd45 Merge pull request #1705 from ranaroussi/fix/tz-cache-init
Fix TZ cache exception blocking import
2023-09-30 21:16:04 +01:00
Value Raider
62b2c25da8 Disable broken Ticker tests 2023-09-27 21:45:46 +01:00
Value Raider
7618dda5d0 Hopefully fix TZ cache exception blocking import
Hopefully fix TZ cache exception blocking import. Also:
- relocate cache init lock
- add test for setTzCacheLocation()
2023-09-27 21:34:59 +01:00
ValueRaider
95ef486e13 Merge pull request #1704 from ranaroussi/main
sync main -> dev
2023-09-27 20:53:42 +01:00
ValueRaider
9e59f6b61c Merge pull request #1703 from ranaroussi/fix/prices-intraday-merge-events
Fix merging pre-market events with intraday prices
2023-09-27 20:52:23 +01:00
Value Raider
716cd65fd3 Fix merging pre-market events with intraday prices 2023-09-25 22:19:24 +01:00
ValueRaider
127b53ee7f Merge pull request #1693 from ranaroussi/main
sync main -> dev
2023-09-22 14:52:36 +01:00
29 changed files with 3514 additions and 2582 deletions

View File

@@ -6,23 +6,13 @@ body:
- type: markdown
attributes:
value: |
# IMPORTANT - Read and follow these instructions carefully. Help us help you.
### Does issue already exist?
Use the search tool. Don't annoy everyone by duplicating existing Issues.
# !!! IMPORTANT !!! FOLLOW THESE INSTRUCTIONS CAREFULLY !!!
### Are you up-to-date?
Upgrade to the latest version and confirm the issue/bug is still there.
Upgrade to the latest version: `$ pip install yfinance --upgrade --no-cache-dir`
`$ pip install yfinance --upgrade --no-cache-dir`
Confirm by running:
`import yfinance as yf ; print(yf.__version__)`
and comparing against [PIP](https://pypi.org/project/yfinance/#history).
Confirm latest version by running: `import yfinance as yf ; print(yf.__version__)` and comparing against [PyPI](https://pypi.org/project/yfinance/#history).
### Does Yahoo actually have the data?
@@ -34,6 +24,10 @@ body:
Yahoo Finance free service has rate-limiting https://github.com/ranaroussi/yfinance/discussions/1513. Once limit hit, Yahoo can delay, block, or return bad data -> not a `yfinance` bug.
### Does issue already exist?
Use the search tool. Don't duplicate existing issues.
- type: markdown
attributes:
value: |
@@ -61,7 +55,7 @@ body:
id: debug-log
attributes:
label: "Debug log"
description: "Run code with debug logging enabled and post the full output. Instructions: https://github.com/ranaroussi/yfinance/tree/main#logging"
description: "Run code with debug logging enabled and post the full output. IMPORTANT INSTRUCTIONS: https://github.com/ranaroussi/yfinance/tree/main#logging"
validations:
required: true

View File

@@ -8,11 +8,11 @@ jobs:
deploy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: 3.x
- run: pip install -r requirements.txt
- run: pip install mkdocstrings==0.14.0
- run: pip install mkdocs-material
- run: mkdocs gh-deploy --force
- run: mkdocs gh-deploy --force

13
.github/workflows/ruff.yml vendored Normal file
View File

@@ -0,0 +1,13 @@
name: Ruff
on:
pull_request:
branches:
- master
- main
- dev
jobs:
ruff:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: chartboost/ruff-action@v1

View File

@@ -1,6 +1,70 @@
Change Log
===========
0.2.38
------
Fix holders & insiders #1908
0.2.37
------
Small fixes:
- Fix Pandas warnings #1838 #1844
- Fix price repair bug, typos, refactor #1866 #1865 #1849
- Stop disabling logging #1841
0.2.36
------
Small fixes:
- Update README.md for better copy-ability #1823
- Name download() column levels #1795
- Fix history(keepna=False) when repair=True #1824
- Replace empty list with empty pd.Series #1724
- Handle peewee with old sqlite #1827
- Fix JSON error handling #1830 #1833
0.2.35
------
Internal fixes for 0.2.34
0.2.34
------
Features:
- Add Recommendations Trend Summary #1754
- Add Recommendation upgrades & downgrades #1773
- Add Insider Roster & Transactions #1772
- Moved download() progress bar to STDERR #1776
- PIP optional dependencies #1771
- Set sensible min versions for optional 'nospam' reqs #1807
Fixes
- Fix download() DatetimeIndex on invalid symbols #1779
- Fix invalid date entering cache DB #1796
- Fix Ticker.calendar fetch #1790
- Fixed adding complementary to info #1774
- Ticker.earnings_dates: fix warning "Value 'NaN' has dtype incompatible with float64" #1810
- Minor fixes for price repair and related tests #1768
- Fix price repair div adjust #1798
- Fix 'raise_errors' argument ignored in Ticker.history() #1806
Maintenance
- Fix regression: _get_ticker_tz() args were being swapped. Improve its unit test #1793
- Refactor Ticker proxy #1711
- Add Ruff linter checks #1756
- Resolve Pandas FutureWarnings #1766
0.2.33
------
Cookie fixes:
- fix backup strategy #1759
- fix Ticker(ISIN) #1760
0.2.32
------
Add cookie & crumb to requests #1657
0.2.31
------
- Fix TZ cache exception blocking import #1705 #1709
- Fix merging pre-market events with intraday prices #1703
0.2.30
------
- Fix OperationalError #1698

15
CODE_OF_CONDUCT.md Normal file
View File

@@ -0,0 +1,15 @@
# Code of Conduct
## Submitting a new issue
* Search through existing Issues and Discussions, in case your issue already exists and a solution is being developed.
* Ensure you read & follow the template form.
* Consider you may be the best person to investigate and fix.
## Contributing to an existing Issue
* Read the entire thread.
* Ensure your comment is contributing something new/useful. Remember you can simply react to other comments.
* Be concise:
- use the formatting options
- if replying to a big comment, instead of quoting it, link to it

View File

@@ -42,6 +42,26 @@ Yahoo! finance API is intended for personal use only.**
---
## Installation
Install `yfinance` using `pip`:
``` {.sourceCode .bash}
$ pip install yfinance --upgrade --no-cache-dir
```
[With Conda](https://anaconda.org/ranaroussi/yfinance).
To install with optional dependencies, replace `optional` with: `nospam` for [caching-requests](#smarter-scraping), `repair` for [price repair](https://github.com/ranaroussi/yfinance/wiki/Price-repair), or `nospam,repair` for both:
``` {.sourceCode .bash}
$ pip install "yfinance[optional]"
```
[Required dependencies](./requirements.txt) , [all dependencies](./setup.py#L62).
---
## Quick Start
### The Ticker module
@@ -87,6 +107,14 @@ msft.quarterly_cashflow
msft.major_holders
msft.institutional_holders
msft.mutualfund_holders
msft.insider_transactions
msft.insider_purchases
msft.insider_roster_holders
# show recommendations
msft.recommendations
msft.recommendations_summary
msft.upgrades_downgrades
# Show future and historic earnings dates, returns at most next 4 quarters and last 8 quarters by default.
# Note: If more are needed use msft.get_earnings_dates(limit=XX) with increased limit argument.
@@ -155,9 +183,10 @@ data = yf.download("SPY AAPL", period="1mo")
### Smarter scraping
To use a custom `requests` session (for example to cache calls to the
API or customize the `User-agent` header), pass a `session=` argument to
the Ticker constructor.
Install the `nospam` packages for smarter scraping using `pip` (see [Installation](#installation)). These packages help cache calls such that Yahoo is not spammed with requests.
To use a custom `requests` session, pass a `session=` argument to
the Ticker constructor. This allows for caching calls to the API as well as a custom way to modify requests via the `User-agent` header.
```python
import requests_cache
@@ -168,7 +197,7 @@ ticker = yf.Ticker('msft', session=session)
ticker.actions
```
Combine a `requests_cache` with rate-limiting to avoid triggering Yahoo's rate-limiter/blocker that can corrupt data.
Combine `requests_cache` with rate-limiting to avoid triggering Yahoo's rate-limiter/blocker that can corrupt data.
```python
from requests import Session
from requests_cache import CacheMixin, SQLiteCache
@@ -216,11 +245,13 @@ yf.pdr_override() # <== that's all it takes :-)
data = pdr.get_data_yahoo("SPY", start="2017-01-01", end="2017-04-30")
```
### Timezone cache store
### Persistent cache store
To reduce Yahoo, yfinance store some data locally: timezones to localize dates, and cookie. Cache location is:
- Windows = C:/Users/\<USER\>/AppData/Local/py-yfinance
- Linux = /home/\<USER\>/.cache/py-yfinance
- MacOS = /Users/\<USER\>/Library/Caches/py-yfinance
When fetching price data, all dates are localized to stock exchange timezone.
But timezone retrieval is relatively slow, so yfinance attemps to cache them
in your users cache folder.
You can direct cache to use a different location with `set_tz_cache_location()`:
```python
import yfinance as yf
@@ -230,41 +261,6 @@ yf.set_tz_cache_location("custom/cache/location")
---
## Installation
Install `yfinance` using `pip`:
``` {.sourceCode .bash}
$ pip install yfinance --upgrade --no-cache-dir
```
Test new features by installing betas, provide feedback in [corresponding Discussion](https://github.com/ranaroussi/yfinance/discussions):
``` {.sourceCode .bash}
$ pip install yfinance --upgrade --no-cache-dir --pre
```
To install `yfinance` using `conda`, see
[this](https://anaconda.org/ranaroussi/yfinance).
### Requirements
- [Python](https://www.python.org) \>= 2.7, 3.4+
- [Pandas](https://github.com/pydata/pandas) \>= 1.3.0
- [Numpy](http://www.numpy.org) \>= 1.16.5
- [requests](http://docs.python-requests.org/en/master) \>= 2.31
- [lxml](https://pypi.org/project/lxml) \>= 4.9.1
- [appdirs](https://pypi.org/project/appdirs) \>= 1.4.4
- [pytz](https://pypi.org/project/pytz) \>=2022.5
- [frozendict](https://pypi.org/project/frozendict) \>= 2.3.4
- [beautifulsoup4](https://pypi.org/project/beautifulsoup4) \>= 4.11.1
- [html5lib](https://pypi.org/project/html5lib) \>= 1.1
- [peewee](https://pypi.org/project/peewee) \>= 3.16.2
#### Optional (if you want to use `pandas_datareader`)
- [pandas\_datareader](https://github.com/pydata/pandas-datareader)
\>= 0.4.0
## Developers: want to contribute?
`yfinance` relies on community to investigate bugs and contribute code. Developer guide: https://github.com/ranaroussi/yfinance/discussions/1084

View File

@@ -1,5 +1,5 @@
{% set name = "yfinance" %}
{% set version = "0.2.30" %}
{% set version = "0.2.38" %}
package:
name: "{{ name|lower }}"

View File

@@ -8,4 +8,4 @@ pytz>=2022.5
frozendict>=2.3.4
beautifulsoup4>=4.11.1
html5lib>=1.1
peewee>=3.16.2
peewee>=3.16.2

View File

@@ -64,6 +64,10 @@ setup(
'lxml>=4.9.1', 'appdirs>=1.4.4', 'pytz>=2022.5',
'frozendict>=2.3.4', 'peewee>=3.16.2',
'beautifulsoup4>=4.11.1', 'html5lib>=1.1'],
extras_require={
'nospam': ['requests_cache>=1.0', 'requests_ratelimiter>=0.3.1'],
'repair': ['scipy>=1.6.3'],
},
# Note: Pandas.read_html() needs html5lib & beautifulsoup4
entry_points={
'console_scripts': [

View File

@@ -1,73 +0,0 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
#
# yfinance - market data downloader
# https://github.com/ranaroussi/yfinance
"""
Sanity check for most common library uses all working
- Stock: Microsoft
- ETF: Russell 2000 Growth
- Mutual fund: Vanguard 500 Index fund
- Index: S&P500
- Currency BTC-USD
"""
import yfinance as yf
import unittest
import logging
logging.basicConfig(level=logging.DEBUG)
symbols = ['MSFT', 'IWO', 'VFINX', '^GSPC', 'BTC-USD']
tickers = [yf.Ticker(symbol) for symbol in symbols]
class TestTicker(unittest.TestCase):
def test_info_history(self):
for ticker in tickers:
# always should have info and history for valid symbols
assert(ticker.info is not None and ticker.info != {})
history = ticker.history(period="max")
assert(history.empty is False and history is not None)
def test_attributes(self):
for ticker in tickers:
ticker.isin
ticker.major_holders
ticker.institutional_holders
ticker.mutualfund_holders
ticker.dividends
ticker.splits
ticker.actions
ticker.shares
ticker.info
ticker.calendar
ticker.recommendations
ticker.earnings
ticker.quarterly_earnings
ticker.income_stmt
ticker.quarterly_income_stmt
ticker.balance_sheet
ticker.quarterly_balance_sheet
ticker.cashflow
ticker.quarterly_cashflow
ticker.recommendations_summary
ticker.analyst_price_target
ticker.revenue_forecasts
ticker.sustainability
ticker.options
ticker.news
ticker.earnings_trend
ticker.earnings_dates
ticker.earnings_forecasts
def test_holders(self):
for ticker in tickers:
assert(ticker.info is not None and ticker.info != {})
assert(ticker.major_holders is not None)
assert(ticker.institutional_holders is not None)
if __name__ == '__main__':
unittest.main()

View File

@@ -4,18 +4,20 @@ import appdirs as _ad
import datetime as _dt
import sys
import os
import yfinance
from requests import Session
from requests_cache import CacheMixin, SQLiteCache
from requests_ratelimiter import LimiterMixin, MemoryQueueBucket
from pyrate_limiter import Duration, RequestRate, Limiter
_parent_dp = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
_src_dp = _parent_dp
sys.path.insert(0, _src_dp)
import yfinance
# Optional: see the exact requests that are made during tests:
# import logging
# logging.basicConfig(level=logging.DEBUG)
# Use adjacent cache folder for testing, delete if already exists and older than today
testing_cache_dirpath = os.path.join(_ad.user_cache_dir(), "py-yfinance-testing")
yfinance.set_tz_cache_location(testing_cache_dirpath)
@@ -27,12 +29,8 @@ if os.path.isdir(testing_cache_dirpath):
# Setup a session to rate-limit and cache persistently:
from requests import Session
from requests_cache import CacheMixin, SQLiteCache
from requests_ratelimiter import LimiterMixin, MemoryQueueBucket
class CachedLimiterSession(CacheMixin, LimiterMixin, Session):
pass
from pyrate_limiter import Duration, RequestRate, Limiter
history_rate = RequestRate(1, Duration.SECOND*2)
limiter = Limiter(history_rate)
cache_fp = os.path.join(testing_cache_dirpath, "unittests-cache")

View File

@@ -2,10 +2,10 @@ Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits
2023-05-18 00:00:00+01:00,193.220001220703,200.839996337891,193.220001220703,196.839996337891,196.839996337891,653125,0,0
2023-05-17 00:00:00+01:00,199.740005493164,207.738006591797,190.121994018555,197.860000610352,197.860000610352,822268,0,0
2023-05-16 00:00:00+01:00,215.600006103516,215.600006103516,201.149993896484,205.100006103516,205.100006103516,451009,243.93939,0.471428571428571
2023-05-15 00:00:00+01:00,215.399955531529,219.19995640346,210.599967302595,217.399987792969,102.39998147147,1761679.3939394,0,0
2023-05-12 00:00:00+01:00,214.599988664899,216.199965558733,209.599965558733,211.399977329799,99.573855808803,1522298.48484849,0,0
2023-05-11 00:00:00+01:00,219.999966430664,219.999966430664,212.199987357003,215.000000871931,101.269541277204,3568042.12121213,0,0
2023-05-10 00:00:00+01:00,218.199954659598,223.000000435965,212.59995640346,215.399955531529,101.457929992676,5599908.78787879,0,0
2023-05-09 00:00:00+01:00,224,227.688003540039,218.199996948242,218.399993896484,102.87100982666,1906090,0,0
2023-05-05 00:00:00+01:00,220.999968174526,225.19996686663,220.799976457868,224.4,105.697140066964,964523.636363637,0,0
2023-05-04 00:00:00+01:00,216.999989972796,222.799965558733,216.881988961356,221.399965994698,104.284055655343,880983.93939394,0,0
2023-05-15 00:00:00+01:00,456.9090,464.9696,446.7272,461.1515,217.2121,830506.0000,0,0
2023-05-12 00:00:00+01:00,455.2121,458.6060,444.6060,448.4242,211.2173,717655.0000,0,0
2023-05-11 00:00:00+01:00,466.6666,466.6666,450.1212,456.0606,214.8142,1682077.0000,0,0
2023-05-10 00:00:00+01:00,462.8484,473.0303,450.9696,456.9090,215.2138,2639957.0000,0,0
2023-05-09 00:00:00+01:00,475.1515,482.9746,462.8485,463.2727,218.2112,898585.2857,0,0
2023-05-05 00:00:00+01:00,468.7878,477.6969,468.3636,476.0000,224.2061,454704.0000,0,0
2023-05-04 00:00:00+01:00,460.3030,472.6060,460.0527,469.6363,221.2086,415321.0000,0,0
1 Date Open High Low Close Adj Close Volume Dividends Stock Splits
2 2023-05-18 00:00:00+01:00 193.220001220703 200.839996337891 193.220001220703 196.839996337891 196.839996337891 653125 0 0
3 2023-05-17 00:00:00+01:00 199.740005493164 207.738006591797 190.121994018555 197.860000610352 197.860000610352 822268 0 0
4 2023-05-16 00:00:00+01:00 215.600006103516 215.600006103516 201.149993896484 205.100006103516 205.100006103516 451009 243.93939 0.471428571428571
5 2023-05-15 00:00:00+01:00 215.399955531529 456.9090 219.19995640346 464.9696 210.599967302595 446.7272 217.399987792969 461.1515 102.39998147147 217.2121 1761679.3939394 830506.0000 0 0
6 2023-05-12 00:00:00+01:00 214.599988664899 455.2121 216.199965558733 458.6060 209.599965558733 444.6060 211.399977329799 448.4242 99.573855808803 211.2173 1522298.48484849 717655.0000 0 0
7 2023-05-11 00:00:00+01:00 219.999966430664 466.6666 219.999966430664 466.6666 212.199987357003 450.1212 215.000000871931 456.0606 101.269541277204 214.8142 3568042.12121213 1682077.0000 0 0
8 2023-05-10 00:00:00+01:00 218.199954659598 462.8484 223.000000435965 473.0303 212.59995640346 450.9696 215.399955531529 456.9090 101.457929992676 215.2138 5599908.78787879 2639957.0000 0 0
9 2023-05-09 00:00:00+01:00 224 475.1515 227.688003540039 482.9746 218.199996948242 462.8485 218.399993896484 463.2727 102.87100982666 218.2112 1906090 898585.2857 0 0
10 2023-05-05 00:00:00+01:00 220.999968174526 468.7878 225.19996686663 477.6969 220.799976457868 468.3636 224.4 476.0000 105.697140066964 224.2061 964523.636363637 454704.0000 0 0
11 2023-05-04 00:00:00+01:00 216.999989972796 460.3030 222.799965558733 472.6060 216.881988961356 460.0527 221.399965994698 469.6363 104.284055655343 221.2086 880983.93939394 415321.0000 0 0

View File

@@ -43,6 +43,18 @@ class TestPriceHistory(unittest.TestCase):
df_tkrs = df.columns.levels[1]
self.assertEqual(sorted(tkrs), sorted(df_tkrs))
def test_download_with_invalid_ticker(self):
#Checks if using an invalid symbol gives the same output as not using an invalid symbol in combination with a valid symbol (AAPL)
#Checks to make sure that invalid symbol handling for the date column is the same as the base case (no invalid symbols)
invalid_tkrs = ["AAPL", "ATVI"] #AAPL exists and ATVI does not exist
valid_tkrs = ["AAPL", "INTC"] #AAPL and INTC both exist
data_invalid_sym = yf.download(invalid_tkrs, start='2023-11-16', end='2023-11-17')
data_valid_sym = yf.download(valid_tkrs, start='2023-11-16', end='2023-11-17')
self.assertEqual(data_invalid_sym['Close']['AAPL']['2023-11-16'],data_valid_sym['Close']['AAPL']['2023-11-16'])
def test_duplicatingHourly(self):
tkrs = ["IMP.JO", "BHG.JO", "SSW.JO", "BP.L", "INTC"]
@@ -132,7 +144,6 @@ class TestPriceHistory(unittest.TestCase):
def test_pricesEventsMerge_bug(self):
# Reproduce exception when merging intraday prices with future dividend
tkr = 'S32.AX'
interval = '30m'
df_index = []
d = 13
@@ -148,7 +159,7 @@ class TestPriceHistory(unittest.TestCase):
future_div_dt = _dt.datetime(2023, 9, 14, 10)
divs = _pd.DataFrame(data={"Dividends":[div]}, index=[future_div_dt])
df2 = yf.utils.safe_merge_dfs(df, divs, interval)
yf.utils.safe_merge_dfs(df, divs, interval)
# No exception = test pass
def test_intraDayWithEvents(self):
@@ -223,8 +234,10 @@ class TestPriceHistory(unittest.TestCase):
self.assertTrue((df_divs.index.date == dates).all())
except AssertionError:
print(f'- ticker = {tkr}')
print('- response:') ; print(df_divs.index.date)
print('- answer:') ; print(dates)
print('- response:')
print(df_divs.index.date)
print('- answer:')
print(dates)
raise
def test_dailyWithEvents_bugs(self):
@@ -264,66 +277,12 @@ class TestPriceHistory(unittest.TestCase):
# Reproduce issue #1634 - 1d dividend out-of-range, should be prepended to prices
div_dt = _pd.Timestamp(2022, 7, 21).tz_localize("America/New_York")
df_dividends = _pd.DataFrame(data={"Dividends":[1.0]}, index=[div_dt])
df_prices = _pd.DataFrame(data={c:[1.0] for c in yf.const.price_colnames}|{'Volume':0}, index=[div_dt+_dt.timedelta(days=1)])
df_prices = _pd.DataFrame(data={c:[1.0] for c in yf.const._PRICE_COLNAMES_}|{'Volume':0}, index=[div_dt+_dt.timedelta(days=1)])
df_merged = yf.utils.safe_merge_dfs(df_prices, df_dividends, '1d')
self.assertEqual(df_merged.shape[0], 2)
self.assertTrue(df_merged[df_prices.columns].iloc[1:].equals(df_prices))
self.assertEqual(df_merged.index[0], div_dt)
def test_intraDayWithEvents(self):
tkrs = ["BHP.AX", "IMP.JO", "BP.L", "PNL.L", "INTC"]
test_run = False
for tkr in tkrs:
start_d = _dt.date.today() - _dt.timedelta(days=59)
end_d = None
df_daily = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=True)
df_daily_divs = df_daily["Dividends"][df_daily["Dividends"] != 0]
if df_daily_divs.shape[0] == 0:
continue
last_div_date = df_daily_divs.index[-1]
start_d = last_div_date.date()
end_d = last_div_date.date() + _dt.timedelta(days=1)
df_intraday = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="15m", actions=True)
self.assertTrue((df_intraday["Dividends"] != 0.0).any())
df_intraday_divs = df_intraday["Dividends"][df_intraday["Dividends"] != 0]
df_intraday_divs.index = df_intraday_divs.index.floor('D')
self.assertTrue(df_daily_divs.equals(df_intraday_divs))
test_run = True
if not test_run:
self.skipTest("Skipping test_intraDayWithEvents() because no tickers had a dividend in last 60 days")
def test_intraDayWithEvents_tase(self):
# TASE dividend release pre-market, doesn't merge nicely with intra-day data so check still present
tase_tkrs = ["ICL.TA", "ESLT.TA", "ONE.TA", "MGDL.TA"]
test_run = False
for tkr in tase_tkrs:
start_d = _dt.date.today() - _dt.timedelta(days=59)
end_d = None
df_daily = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=True)
df_daily_divs = df_daily["Dividends"][df_daily["Dividends"] != 0]
if df_daily_divs.shape[0] == 0:
continue
last_div_date = df_daily_divs.index[-1]
start_d = last_div_date.date()
end_d = last_div_date.date() + _dt.timedelta(days=1)
df_intraday = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="15m", actions=True)
self.assertTrue((df_intraday["Dividends"] != 0.0).any())
df_intraday_divs = df_intraday["Dividends"][df_intraday["Dividends"] != 0]
df_intraday_divs.index = df_intraday_divs.index.floor('D')
self.assertTrue(df_daily_divs.equals(df_intraday_divs))
test_run = True
if not test_run:
self.skipTest("Skipping test_intraDayWithEvents_tase() because no tickers had a dividend in last 60 days")
def test_weeklyWithEvents(self):
# Reproduce issue #521
tkr1 = "QQQ"
@@ -415,9 +374,9 @@ class TestPriceHistory(unittest.TestCase):
raise Exception("Ambiguous DST issue not resolved")
def test_dst_fix(self):
# Daily intervals should start at time 00:00. But for some combinations of date and timezone,
# Daily intervals should start at time 00:00. But for some combinations of date and timezone,
# Yahoo has time off by few hours (e.g. Brazil 23:00 around Jan-2022). Suspect DST problem.
# The clue is (a) minutes=0 and (b) hour near 0.
# The clue is (a) minutes=0 and (b) hour near 0.
# Obviously Yahoo meant 00:00, so ensure this doesn't affect date conversion.
# The correction is successful if no days are weekend, and weekly data begins Monday
@@ -440,24 +399,20 @@ class TestPriceHistory(unittest.TestCase):
raise
def test_prune_post_intraday_us(self):
# Half-day before USA Thanksgiving. Yahoo normally
# returns an interval starting when regular trading closes,
# Half-day at USA Thanksgiving. Yahoo normally
# returns an interval starting when regular trading closes,
# even if prepost=False.
# Setup
tkr = "AMZN"
interval = "1h"
interval_td = _dt.timedelta(hours=1)
time_open = _dt.time(9, 30)
time_close = _dt.time(16)
special_day = _dt.date(2022, 11, 25)
special_day = _dt.date(2023, 11, 24)
time_early_close = _dt.time(13)
dat = yf.Ticker(tkr, session=self.session)
# Run
start_d = special_day - _dt.timedelta(days=7)
end_d = special_day + _dt.timedelta(days=7)
df = dat.history(start=start_d, end=end_d, interval=interval, prepost=False, keepna=True)
df = dat.history(start=start_d, end=end_d, interval="1h", prepost=False, keepna=True)
tg_last_dt = df.loc[str(special_day)].index[-1]
self.assertTrue(tg_last_dt.time() < time_early_close)
@@ -466,88 +421,22 @@ class TestPriceHistory(unittest.TestCase):
end_d = _dt.date(special_day.year+1, 1, 1)
df = dat.history(start=start_d, end=end_d, interval="1h", prepost=False, keepna=True)
last_dts = _pd.Series(df.index).groupby(df.index.date).last()
f_early_close = (last_dts+interval_td).dt.time < time_close
early_close_dates = last_dts.index[f_early_close].values
self.assertEqual(len(early_close_dates), 1)
self.assertEqual(early_close_dates[0], special_day)
first_dts = _pd.Series(df.index).groupby(df.index.date).first()
f_late_open = first_dts.dt.time > time_open
late_open_dates = first_dts.index[f_late_open]
self.assertEqual(len(late_open_dates), 0)
def test_prune_post_intraday_omx(self):
# Half-day before Sweden Christmas. Yahoo normally
# returns an interval starting when regular trading closes,
# even if prepost=False.
# If prepost=False, test that yfinance is removing prepost intervals.
# Setup
tkr = "AEC.ST"
interval = "1h"
interval_td = _dt.timedelta(hours=1)
time_open = _dt.time(9)
time_close = _dt.time(17, 30)
special_day = _dt.date(2022, 12, 23)
time_early_close = _dt.time(13, 2)
dat = yf.Ticker(tkr, session=self.session)
# Half trading day Jan 5, Apr 14, May 25, Jun 23, Nov 4, Dec 23, Dec 30
half_days = [_dt.date(special_day.year, x[0], x[1]) for x in [(1, 5), (4, 14), (5, 25), (6, 23), (11, 4), (12, 23), (12, 30)]]
# Yahoo has incorrectly classified afternoon of 2022-04-13 as post-market.
# Nothing yfinance can do because Yahoo doesn't return data with prepost=False.
# But need to handle in this test.
expected_incorrect_half_days = [_dt.date(2022, 4, 13)]
half_days = sorted(half_days+expected_incorrect_half_days)
# Run
start_d = special_day - _dt.timedelta(days=7)
end_d = special_day + _dt.timedelta(days=7)
df = dat.history(start=start_d, end=end_d, interval=interval, prepost=False, keepna=True)
tg_last_dt = df.loc[str(special_day)].index[-1]
self.assertTrue(tg_last_dt.time() < time_early_close)
# Test no other afternoons (or mornings) were pruned
start_d = _dt.date(special_day.year, 1, 1)
end_d = _dt.date(special_day.year+1, 1, 1)
df = dat.history(start=start_d, end=end_d, interval="1h", prepost=False, keepna=True)
last_dts = _pd.Series(df.index).groupby(df.index.date).last()
f_early_close = (last_dts+interval_td).dt.time < time_close
early_close_dates = last_dts.index[f_early_close].values
unexpected_early_close_dates = [d for d in early_close_dates if d not in half_days]
self.assertEqual(len(unexpected_early_close_dates), 0)
self.assertEqual(len(early_close_dates), len(half_days))
self.assertTrue(_np.equal(early_close_dates, half_days).all())
first_dts = _pd.Series(df.index).groupby(df.index.date).first()
f_late_open = first_dts.dt.time > time_open
late_open_dates = first_dts.index[f_late_open]
self.assertEqual(len(late_open_dates), 0)
dfd = dat.history(start=start_d, end=end_d, interval='1d', prepost=False, keepna=True)
self.assertTrue(_np.equal(dfd.index.date, _pd.to_datetime(last_dts.index).date).all())
def test_prune_post_intraday_asx(self):
# Setup
tkr = "BHP.AX"
interval = "1h"
interval_td = _dt.timedelta(hours=1)
time_open = _dt.time(10)
time_close = _dt.time(16, 12)
# No early closes in 2022
# No early closes in 2023
dat = yf.Ticker(tkr, session=self.session)
# Test no afternoons (or mornings) were pruned
start_d = _dt.date(2022, 1, 1)
end_d = _dt.date(2022+1, 1, 1)
# Test no other afternoons (or mornings) were pruned
start_d = _dt.date(2023, 1, 1)
end_d = _dt.date(2023+1, 1, 1)
df = dat.history(start=start_d, end=end_d, interval="1h", prepost=False, keepna=True)
last_dts = _pd.Series(df.index).groupby(df.index.date).last()
f_early_close = (last_dts+interval_td).dt.time < time_close
early_close_dates = last_dts.index[f_early_close].values
self.assertEqual(len(early_close_dates), 0)
first_dts = _pd.Series(df.index).groupby(df.index.date).first()
f_late_open = first_dts.dt.time > time_open
late_open_dates = first_dts.index[f_late_open]
self.assertEqual(len(late_open_dates), 0)
dfd = dat.history(start=start_d, end=end_d, interval='1d', prepost=False, keepna=True)
self.assertTrue(_np.equal(dfd.index.date, _pd.to_datetime(last_dts.index).date).all())
def test_weekly_2rows_fix(self):
tkr = "AMZN"
@@ -566,7 +455,7 @@ class TestPriceHistory(unittest.TestCase):
end = "2019-12-31"
interval = "3mo"
df = dat.history(start=start, end=end, interval=interval)
dat.history(start=start, end=end, interval=interval)
class TestPriceRepair(unittest.TestCase):
@@ -581,6 +470,18 @@ class TestPriceRepair(unittest.TestCase):
if cls.session is not None:
cls.session.close()
def test_types(self):
tkr = 'INTC'
dat = yf.Ticker(tkr, session=self.session)
data = dat.history(period="3mo", interval="1d", prepost=True, repair=True)
self.assertIsInstance(data, _pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
reconstructed = dat._lazy_load_price_history()._reconstruct_intervals_batch(data, "1wk", True)
self.assertIsInstance(reconstructed, _pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
def test_reconstruct_2m(self):
# 2m repair requires 1m data.
# Yahoo restricts 1m fetches to 7 days max within last 30 days.
@@ -589,7 +490,6 @@ class TestPriceRepair(unittest.TestCase):
tkrs = ["BHP.AX", "IMP.JO", "BP.L", "PNL.L", "INTC"]
dt_now = _pd.Timestamp.utcnow()
td_7d = _dt.timedelta(days=7)
td_60d = _dt.timedelta(days=60)
# Round time for 'requests_cache' reuse
@@ -599,13 +499,14 @@ class TestPriceRepair(unittest.TestCase):
dat = yf.Ticker(tkr, session=self.session)
end_dt = dt_now
start_dt = end_dt - td_60d
df = dat.history(start=start_dt, end=end_dt, interval="2m", repair=True)
dat.history(start=start_dt, end=end_dt, interval="2m", repair=True)
def test_repair_100x_random_weekly(self):
# Setup:
tkr = "PNL.L"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.fast_info["timezone"]
hist = dat._lazy_load_price_history()
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
df = _pd.DataFrame(data={"Open": [470.5, 473.5, 474.5, 470],
@@ -629,7 +530,7 @@ class TestPriceRepair(unittest.TestCase):
# Run test
df_repaired = dat._fix_unit_random_mixups(df_bad, "1wk", tz_exchange, prepost=False)
df_repaired = hist._fix_unit_random_mixups(df_bad, "1wk", tz_exchange, prepost=False)
# First test - no errors left
for c in data_cols:
@@ -660,6 +561,7 @@ class TestPriceRepair(unittest.TestCase):
tkr = "PNL.L"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.fast_info["timezone"]
hist = dat._lazy_load_price_history()
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
df = _pd.DataFrame(data={"Open": [400, 398, 392.5, 417],
@@ -686,7 +588,7 @@ class TestPriceRepair(unittest.TestCase):
df.index = df.index.tz_localize(tz_exchange)
df_bad.index = df_bad.index.tz_localize(tz_exchange)
df_repaired = dat._fix_unit_random_mixups(df_bad, "1wk", tz_exchange, prepost=False)
df_repaired = hist._fix_unit_random_mixups(df_bad, "1wk", tz_exchange, prepost=False)
# First test - no errors left
for c in data_cols:
@@ -718,6 +620,7 @@ class TestPriceRepair(unittest.TestCase):
tkr = "PNL.L"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.fast_info["timezone"]
hist = dat._lazy_load_price_history()
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
df = _pd.DataFrame(data={"Open": [478, 476, 476, 472],
@@ -739,7 +642,7 @@ class TestPriceRepair(unittest.TestCase):
df.index = df.index.tz_localize(tz_exchange)
df_bad.index = df_bad.index.tz_localize(tz_exchange)
df_repaired = dat._fix_unit_random_mixups(df_bad, "1d", tz_exchange, prepost=False)
df_repaired = hist._fix_unit_random_mixups(df_bad, "1d", tz_exchange, prepost=False)
# First test - no errors left
for c in data_cols:
@@ -768,6 +671,7 @@ class TestPriceRepair(unittest.TestCase):
for interval in ['1d', '1wk']:
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.fast_info["timezone"]
hist = dat._lazy_load_price_history()
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
_dp = os.path.dirname(__file__)
@@ -784,7 +688,7 @@ class TestPriceRepair(unittest.TestCase):
df.index = _pd.to_datetime(df.index, utc=True).tz_convert(tz_exchange)
df = df.sort_index()
df_repaired = dat._fix_unit_switch(df_bad, interval, tz_exchange)
df_repaired = hist._fix_unit_switch(df_bad, interval, tz_exchange)
df_repaired = df_repaired.sort_index()
# First test - no errors left
@@ -816,6 +720,7 @@ class TestPriceRepair(unittest.TestCase):
def test_repair_zeroes_daily(self):
tkr = "BBIL.L"
dat = yf.Ticker(tkr, session=self.session)
hist = dat._lazy_load_price_history()
tz_exchange = dat.fast_info["timezone"]
df_bad = _pd.DataFrame(data={"Open": [0, 102.04, 102.04],
@@ -831,7 +736,7 @@ class TestPriceRepair(unittest.TestCase):
df_bad.index.name = "Date"
df_bad.index = df_bad.index.tz_localize(tz_exchange)
repaired_df = dat._fix_zeroes(df_bad, "1d", tz_exchange, prepost=False)
repaired_df = hist._fix_zeroes(df_bad, "1d", tz_exchange, prepost=False)
correct_df = df_bad.copy()
correct_df.loc["2022-11-01", "Open"] = 102.080002
@@ -844,7 +749,7 @@ class TestPriceRepair(unittest.TestCase):
self.assertFalse(repaired_df["Repaired?"].isna().any())
def test_repair_zeroes_daily_adjClose(self):
# Test that 'Adj Close' is reconstructed correctly,
# Test that 'Adj Close' is reconstructed correctly,
# particularly when a dividend occurred within 1 day.
tkr = "INTC"
@@ -865,6 +770,7 @@ class TestPriceRepair(unittest.TestCase):
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.fast_info["timezone"]
df.index = df.index.tz_localize(tz_exchange)
hist = dat._lazy_load_price_history()
rtol = 5e-3
for i in [0, 1, 2]:
@@ -873,7 +779,7 @@ class TestPriceRepair(unittest.TestCase):
df_slice_bad = df_slice.copy()
df_slice_bad.loc[df_slice_bad.index[j], "Adj Close"] = 0.0
df_slice_bad_repaired = dat._fix_zeroes(df_slice_bad, "1d", tz_exchange, prepost=False)
df_slice_bad_repaired = hist._fix_zeroes(df_slice_bad, "1d", tz_exchange, prepost=False)
for c in ["Close", "Adj Close"]:
self.assertTrue(_np.isclose(df_slice_bad_repaired[c], df_slice[c], rtol=rtol).all())
self.assertTrue("Repaired?" in df_slice_bad_repaired.columns)
@@ -883,8 +789,9 @@ class TestPriceRepair(unittest.TestCase):
tkr = "INTC"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.fast_info["timezone"]
hist = dat._lazy_load_price_history()
correct_df = dat.history(period="1wk", interval="1h", auto_adjust=False, repair=True)
correct_df = hist.history(period="1wk", interval="1h", auto_adjust=False, repair=True)
df_bad = correct_df.copy()
bad_idx = correct_df.index[10]
@@ -895,7 +802,7 @@ class TestPriceRepair(unittest.TestCase):
df_bad.loc[bad_idx, "Adj Close"] = _np.nan
df_bad.loc[bad_idx, "Volume"] = 0
repaired_df = dat._fix_zeroes(df_bad, "1h", tz_exchange, prepost=False)
repaired_df = hist._fix_zeroes(df_bad, "1h", tz_exchange, prepost=False)
for c in ["Open", "Low", "High", "Close"]:
try:
@@ -914,21 +821,22 @@ class TestPriceRepair(unittest.TestCase):
self.assertFalse(repaired_df["Repaired?"].isna().any())
def test_repair_bad_stock_split(self):
# Stocks that split in 2022 but no problems in Yahoo data,
# Stocks that split in 2022 but no problems in Yahoo data,
# so repair should change nothing
good_tkrs = ['AMZN', 'DXCM', 'FTNT', 'GOOG', 'GME', 'PANW', 'SHOP', 'TSLA']
good_tkrs += ['AEI', 'CHRA', 'GHI', 'IRON', 'LXU', 'NUZE', 'RSLS', 'TISI']
good_tkrs += ['AEI', 'GHI', 'IRON', 'LXU', 'NUZE', 'RSLS', 'TISI']
good_tkrs += ['BOL.ST', 'TUI1.DE']
intervals = ['1d', '1wk', '1mo', '3mo']
for tkr in good_tkrs:
for interval in intervals:
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.fast_info["timezone"]
hist = dat._lazy_load_price_history()
_dp = os.path.dirname(__file__)
df_good = dat.history(start='2020-01-01', end=_dt.date.today(), interval=interval, auto_adjust=False)
repaired_df = dat._fix_bad_stock_split(df_good, interval, tz_exchange)
repaired_df = hist._fix_bad_stock_split(df_good, interval, tz_exchange)
# Expect no change from repair
df_good = df_good.sort_index()
@@ -948,6 +856,7 @@ class TestPriceRepair(unittest.TestCase):
for tkr in bad_tkrs:
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.fast_info["timezone"]
hist = dat._lazy_load_price_history()
_dp = os.path.dirname(__file__)
interval = '1d'
@@ -958,7 +867,7 @@ class TestPriceRepair(unittest.TestCase):
df_bad = _pd.read_csv(fp, index_col="Date")
df_bad.index = _pd.to_datetime(df_bad.index, utc=True)
repaired_df = dat._fix_bad_stock_split(df_bad, "1d", tz_exchange)
repaired_df = hist._fix_bad_stock_split(df_bad, "1d", tz_exchange)
fp = os.path.join(_dp, "data", tkr.replace('.','-')+'-'+interval+"-bad-stock-split-fixed.csv")
correct_df = _pd.read_csv(fp, index_col="Date")
@@ -979,8 +888,8 @@ class TestPriceRepair(unittest.TestCase):
# print(repaired_df[c] - correct_df[c])
raise
# Had very high price volatility in Jan-2021 around split date that could
# be mistaken for missing stock split adjustment. And old logic did think
# Had very high price volatility in Jan-2021 around split date that could
# be mistaken for missing stock split adjustment. And old logic did think
# column 'High' required fixing - wrong!
sketchy_tkrs = ['FIZZ']
intervals = ['1wk']
@@ -988,11 +897,12 @@ class TestPriceRepair(unittest.TestCase):
for interval in intervals:
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.fast_info["timezone"]
hist = dat._lazy_load_price_history()
_dp = os.path.dirname(__file__)
df_good = dat.history(start='2020-11-30', end='2021-04-01', interval=interval, auto_adjust=False)
df_good = hist.history(start='2020-11-30', end='2021-04-01', interval=interval, auto_adjust=False)
repaired_df = dat._fix_bad_stock_split(df_good, interval, tz_exchange)
repaired_df = hist._fix_bad_stock_split(df_good, interval, tz_exchange)
# Expect no change from repair
df_good = df_good.sort_index()
@@ -1012,12 +922,13 @@ class TestPriceRepair(unittest.TestCase):
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.fast_info["timezone"]
hist = dat._lazy_load_price_history()
_dp = os.path.dirname(__file__)
df_bad = _pd.read_csv(os.path.join(_dp, "data", tkr.replace('.','-')+"-1d-missing-div-adjust.csv"), index_col="Date")
df_bad.index = _pd.to_datetime(df_bad.index)
repaired_df = dat._fix_missing_div_adjust(df_bad, "1d", tz_exchange)
repaired_df = hist._fix_missing_div_adjust(df_bad, "1d", tz_exchange)
correct_df = _pd.read_csv(os.path.join(_dp, "data", tkr.replace('.','-')+"-1d-missing-div-adjust-fixed.csv"), index_col="Date")
correct_df.index = _pd.to_datetime(correct_df.index)

View File

@@ -9,14 +9,64 @@ Specific test class:
"""
import pandas as pd
import numpy as np
from .context import yfinance as yf
from .context import session_gbl
from yfinance.exceptions import YFNotImplementedError
import unittest
import requests_cache
from typing import Union, Any, get_args, _GenericAlias
from urllib.parse import urlparse, parse_qs, urlencode, urlunparse
ticker_attributes = (
("major_holders", pd.DataFrame),
("institutional_holders", pd.DataFrame),
("mutualfund_holders", pd.DataFrame),
("insider_transactions", pd.DataFrame),
("insider_purchases", pd.DataFrame),
("insider_roster_holders", pd.DataFrame),
("splits", pd.Series),
("actions", pd.DataFrame),
("shares", pd.DataFrame),
("info", dict),
("calendar", dict),
("recommendations", Union[pd.DataFrame, dict]),
("recommendations_summary", Union[pd.DataFrame, dict]),
("upgrades_downgrades", Union[pd.DataFrame, dict]),
("earnings", pd.DataFrame),
("quarterly_earnings", pd.DataFrame),
("quarterly_cashflow", pd.DataFrame),
("cashflow", pd.DataFrame),
("quarterly_balance_sheet", pd.DataFrame),
("balance_sheet", pd.DataFrame),
("quarterly_income_stmt", pd.DataFrame),
("income_stmt", pd.DataFrame),
("analyst_price_target", pd.DataFrame),
("revenue_forecasts", pd.DataFrame),
("sustainability", pd.DataFrame),
("options", tuple),
("news", Any),
("earnings_trend", pd.DataFrame),
("earnings_dates", pd.DataFrame),
("earnings_forecasts", pd.DataFrame),
)
def assert_attribute_type(testClass: unittest.TestCase, instance, attribute_name, expected_type):
try:
attribute = getattr(instance, attribute_name)
if attribute is not None and expected_type is not Any:
err_msg = f'{attribute_name} type is {type(attribute)} not {expected_type}'
if isinstance(expected_type, _GenericAlias) and expected_type.__origin__ is Union:
allowed_types = get_args(expected_type)
testClass.assertTrue(isinstance(attribute, allowed_types), err_msg)
else:
testClass.assertEqual(type(attribute), expected_type, err_msg)
except Exception:
testClass.assertRaises(
YFNotImplementedError, lambda: getattr(instance, attribute_name)
)
class TestTicker(unittest.TestCase):
session = None
@@ -36,11 +86,11 @@ class TestTicker(unittest.TestCase):
tkrs = ["IMP.JO", "BHG.JO", "SSW.JO", "BP.L", "INTC"]
for tkr in tkrs:
# First step: remove ticker from tz-cache
yf.utils.get_tz_cache().store(tkr, None)
yf.cache.get_tz_cache().store(tkr, None)
# Test:
dat = yf.Ticker(tkr, session=self.session)
tz = dat._get_ticker_tz(proxy=None, timeout=None)
tz = dat._get_ticker_tz(proxy=None, timeout=5)
self.assertIsNotNone(tz)
@@ -61,38 +111,24 @@ class TestTicker(unittest.TestCase):
for k in dat.fast_info:
dat.fast_info[k]
dat.isin
dat.major_holders
dat.institutional_holders
dat.mutualfund_holders
dat.dividends
dat.splits
dat.actions
dat.get_shares_full()
dat.options
dat.news
dat.earnings_dates
for attribute_name, attribute_type in ticker_attributes:
assert_attribute_type(self, dat, attribute_name, attribute_type)
dat.income_stmt
dat.quarterly_income_stmt
dat.balance_sheet
dat.quarterly_balance_sheet
dat.cashflow
dat.quarterly_cashflow
with self.assertRaises(YFNotImplementedError):
assert isinstance(dat.earnings, pd.Series)
assert dat.earnings.empty
assert isinstance(dat.dividends, pd.Series)
assert dat.dividends.empty
assert isinstance(dat.splits, pd.Series)
assert dat.splits.empty
assert isinstance(dat.capital_gains, pd.Series)
assert dat.capital_gains.empty
with self.assertRaises(YFNotImplementedError):
assert isinstance(dat.shares, pd.DataFrame)
assert dat.shares.empty
assert isinstance(dat.actions, pd.DataFrame)
assert dat.actions.empty
# These haven't been ported Yahoo API
# dat.shares
# dat.info
# dat.calendar
# dat.recommendations
# dat.earnings
# dat.quarterly_earnings
# dat.recommendations_summary
# dat.analyst_price_target
# dat.revenue_forecasts
# dat.sustainability
# dat.earnings_trend
# dat.earnings_forecasts
def test_goodTicker(self):
# that yfinance works when full api is called on same instance of ticker
@@ -113,163 +149,21 @@ class TestTicker(unittest.TestCase):
for k in dat.fast_info:
dat.fast_info[k]
dat.isin
dat.major_holders
dat.institutional_holders
dat.mutualfund_holders
dat.dividends
dat.splits
dat.actions
dat.get_shares_full()
dat.options
dat.news
dat.earnings_dates
dat.income_stmt
dat.quarterly_income_stmt
dat.balance_sheet
dat.quarterly_balance_sheet
dat.cashflow
dat.quarterly_cashflow
# These require decryption which is broken:
# dat.shares
# dat.info
# dat.calendar
# dat.recommendations
# dat.earnings
# dat.quarterly_earnings
# dat.recommendations_summary
# dat.analyst_price_target
# dat.revenue_forecasts
# dat.sustainability
# dat.earnings_trend
# dat.earnings_forecasts
for attribute_name, attribute_type in ticker_attributes:
assert_attribute_type(self, dat, attribute_name, attribute_type)
def test_goodTicker_withProxy(self):
# that yfinance works when full api is called on same instance of ticker
tkr = "IBM"
dat = yf.Ticker(tkr, session=self.session)
dat = yf.Ticker(tkr, session=self.session, proxy=self.proxy)
dat._fetch_ticker_tz(proxy=self.proxy, timeout=5, debug_mode=False, raise_errors=False)
dat._get_ticker_tz(proxy=self.proxy, timeout=5, debug_mode=False, raise_errors=False)
dat.history(period="1wk", proxy=self.proxy)
v = dat.stats(proxy=self.proxy)
self.assertIsNotNone(v)
self.assertTrue(len(v) > 0)
v = dat.get_recommendations(proxy=self.proxy)
self.assertIsNotNone(v)
self.assertFalse(v.empty)
v = dat.get_calendar(proxy=self.proxy)
self.assertIsNotNone(v)
self.assertFalse(v.empty)
v = dat.get_major_holders(proxy=self.proxy)
self.assertIsNotNone(v)
self.assertFalse(v.empty)
v = dat.get_institutional_holders(proxy=self.proxy)
self.assertIsNotNone(v)
self.assertFalse(v.empty)
v = dat.get_mutualfund_holders(proxy=self.proxy)
self.assertIsNotNone(v)
self.assertFalse(v.empty)
v = dat.get_info(proxy=self.proxy)
self.assertIsNotNone(v)
self.assertTrue(len(v) > 0)
v = dat.get_sustainability(proxy=self.proxy)
self.assertIsNotNone(v)
self.assertFalse(v.empty)
v = dat.get_recommendations_summary(proxy=self.proxy)
self.assertIsNotNone(v)
self.assertFalse(v.empty)
v = dat.get_analyst_price_target(proxy=self.proxy)
self.assertIsNotNone(v)
self.assertFalse(v.empty)
v = dat.get_rev_forecast(proxy=self.proxy)
self.assertIsNotNone(v)
self.assertFalse(v.empty)
v = dat.get_earnings_forecast(proxy=self.proxy)
self.assertIsNotNone(v)
self.assertFalse(v.empty)
v = dat.get_trend_details(proxy=self.proxy)
self.assertIsNotNone(v)
self.assertFalse(v.empty)
v = dat.get_earnings_trend(proxy=self.proxy)
self.assertIsNotNone(v)
self.assertFalse(v.empty)
v = dat.get_earnings(proxy=self.proxy)
self.assertIsNotNone(v)
self.assertFalse(v.empty)
v = dat.get_income_stmt(proxy=self.proxy)
self.assertIsNotNone(v)
self.assertFalse(v.empty)
v = dat.get_incomestmt(proxy=self.proxy)
self.assertIsNotNone(v)
self.assertFalse(v.empty)
v = dat.get_financials(proxy=self.proxy)
self.assertIsNotNone(v)
self.assertFalse(v.empty)
v = dat.get_balance_sheet(proxy=self.proxy)
self.assertIsNotNone(v)
self.assertFalse(v.empty)
v = dat.get_balancesheet(proxy=self.proxy)
self.assertIsNotNone(v)
self.assertFalse(v.empty)
v = dat.get_cash_flow(proxy=self.proxy)
self.assertIsNotNone(v)
self.assertFalse(v.empty)
v = dat.get_cashflow(proxy=self.proxy)
self.assertIsNotNone(v)
self.assertFalse(v.empty)
v = dat.get_shares(proxy=self.proxy)
self.assertIsNotNone(v)
self.assertFalse(v.empty)
v = dat.get_shares_full(proxy=self.proxy)
self.assertIsNotNone(v)
self.assertFalse(v.empty)
v = dat.get_isin(proxy=self.proxy)
self.assertIsNotNone(v)
self.assertTrue(v != "")
v = dat.get_news(proxy=self.proxy)
self.assertIsNotNone(v)
self.assertTrue(len(v) > 0)
v = dat.get_earnings_dates(proxy=self.proxy)
self.assertIsNotNone(v)
self.assertFalse(v.empty)
# TODO: enable after merge
# dat.get_history_metadata(proxy=self.proxy)
# self.assertIsNotNone(v)
# self.assertTrue(len(v) > 0)
dat._fetch_ticker_tz(proxy=None, timeout=5)
dat._get_ticker_tz(proxy=None, timeout=5)
dat.history(period="1wk")
for attribute_name, attribute_type in ticker_attributes:
assert_attribute_type(self, dat, attribute_name, attribute_type)
class TestTickerHistory(unittest.TestCase):
session = None
@@ -312,16 +206,32 @@ class TestTickerHistory(unittest.TestCase):
As doing other type of scraping calls than "query2.finance.yahoo.com/v8/finance/chart" to yahoo website
will quickly trigger spam-block when doing bulk download of history data.
"""
session = requests_cache.CachedSession(backend='memory')
ticker = yf.Ticker("GOOGL", session=session)
ticker.history("1y")
actual_urls_called = tuple([r.url for r in session.cache.filter()])
session.close()
expected_urls = (
'https://query2.finance.yahoo.com/v8/finance/chart/GOOGL?events=div,splits,capitalGains&includePrePost=False&interval=1d&range=1y',
)
self.assertEqual(expected_urls, actual_urls_called, "Different than expected url used to fetch history.")
symbol = "GOOGL"
period = "1y"
with requests_cache.CachedSession(backend="memory") as session:
ticker = yf.Ticker(symbol, session=session)
ticker.history(period=period)
actual_urls_called = [r.url for r in session.cache.filter()]
# Remove 'crumb' argument
for i in range(len(actual_urls_called)):
u = actual_urls_called[i]
parsed_url = urlparse(u)
query_params = parse_qs(parsed_url.query)
query_params.pop('crumb', None)
query_params.pop('cookie', None)
u = urlunparse(parsed_url._replace(query=urlencode(query_params, doseq=True)))
actual_urls_called[i] = u
actual_urls_called = tuple(actual_urls_called)
expected_urls = (
f"https://query2.finance.yahoo.com/v8/finance/chart/{symbol}?events=div%2Csplits%2CcapitalGains&includePrePost=False&interval=1d&range={period}",
)
self.assertEqual(
expected_urls,
actual_urls_called,
"Different than expected url used to fetch history."
)
def test_dividends(self):
data = self.ticker.dividends
self.assertIsInstance(data, pd.Series, "data has wrong type")
@@ -338,76 +248,77 @@ class TestTickerHistory(unittest.TestCase):
self.assertFalse(data.empty, "data is empty")
# Below will fail because not ported to Yahoo API
# class TestTickerEarnings(unittest.TestCase):
# session = None
class TestTickerEarnings(unittest.TestCase):
session = None
# @classmethod
# def setUpClass(cls):
# cls.session = session_gbl
@classmethod
def setUpClass(cls):
cls.session = session_gbl
# @classmethod
# def tearDownClass(cls):
# if cls.session is not None:
# cls.session.close()
@classmethod
def tearDownClass(cls):
if cls.session is not None:
cls.session.close()
# def setUp(self):
# self.ticker = yf.Ticker("GOOGL", session=self.session)
def setUp(self):
self.ticker = yf.Ticker("GOOGL", session=self.session)
# def tearDown(self):
# self.ticker = None
def tearDown(self):
self.ticker = None
# def test_earnings(self):
# data = self.ticker.earnings
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
# self.assertFalse(data.empty, "data is empty")
def test_earnings_dates(self):
data = self.ticker.earnings_dates
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
# data_cached = self.ticker.earnings
# self.assertIs(data, data_cached, "data not cached")
def test_earnings_dates_with_limit(self):
# use ticker with lots of historic earnings
ticker = yf.Ticker("IBM")
limit = 110
data = ticker.get_earnings_dates(limit=limit)
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
self.assertEqual(len(data), limit, "Wrong number or rows")
# def test_quarterly_earnings(self):
# data = self.ticker.quarterly_earnings
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
# self.assertFalse(data.empty, "data is empty")
data_cached = ticker.get_earnings_dates(limit=limit)
self.assertIs(data, data_cached, "data not cached")
# data_cached = self.ticker.quarterly_earnings
# self.assertIs(data, data_cached, "data not cached")
# Below will fail because not ported to Yahoo API
# def test_earnings_forecasts(self):
# data = self.ticker.earnings_forecasts
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
# self.assertFalse(data.empty, "data is empty")
# def test_earnings(self):
# data = self.ticker.earnings
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
# self.assertFalse(data.empty, "data is empty")
# data_cached = self.ticker.earnings_forecasts
# self.assertIs(data, data_cached, "data not cached")
# data_cached = self.ticker.earnings
# self.assertIs(data, data_cached, "data not cached")
# def test_earnings_dates(self):
# data = self.ticker.earnings_dates
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
# self.assertFalse(data.empty, "data is empty")
# def test_quarterly_earnings(self):
# data = self.ticker.quarterly_earnings
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
# self.assertFalse(data.empty, "data is empty")
# data_cached = self.ticker.earnings_dates
# self.assertIs(data, data_cached, "data not cached")
# data_cached = self.ticker.quarterly_earnings
# self.assertIs(data, data_cached, "data not cached")
# def test_earnings_trend(self):
# data = self.ticker.earnings_trend
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
# self.assertFalse(data.empty, "data is empty")
# def test_earnings_forecasts(self):
# data = self.ticker.earnings_forecasts
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
# self.assertFalse(data.empty, "data is empty")
# data_cached = self.ticker.earnings_trend
# self.assertIs(data, data_cached, "data not cached")
# data_cached = self.ticker.earnings_forecasts
# self.assertIs(data, data_cached, "data not cached")
# def test_earnings_dates_with_limit(self):
# # use ticker with lots of historic earnings
# ticker = yf.Ticker("IBM")
# limit = 110
# data = ticker.get_earnings_dates(limit=limit)
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
# self.assertFalse(data.empty, "data is empty")
# self.assertEqual(len(data), limit, "Wrong number or rows")
# data_cached = self.ticker.earnings_dates
# self.assertIs(data, data_cached, "data not cached")
# data_cached = ticker.get_earnings_dates(limit=limit)
# self.assertIs(data, data_cached, "data not cached")
# def test_earnings_trend(self):
# data = self.ticker.earnings_trend
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
# self.assertFalse(data.empty, "data is empty")
# data_cached = self.ticker.earnings_trend
# self.assertIs(data, data_cached, "data not cached")
class TestTickerHolders(unittest.TestCase):
@@ -452,6 +363,30 @@ class TestTickerHolders(unittest.TestCase):
data_cached = self.ticker.mutualfund_holders
self.assertIs(data, data_cached, "data not cached")
def test_insider_transactions(self):
data = self.ticker.insider_transactions
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.insider_transactions
self.assertIs(data, data_cached, "data not cached")
def test_insider_purchases(self):
data = self.ticker.insider_purchases
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.insider_purchases
self.assertIs(data, data_cached, "data not cached")
def test_insider_roster_holders(self):
data = self.ticker.insider_roster_holders
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.insider_roster_holders
self.assertIs(data, data_cached, "data not cached")
class TestTickerMiscFinancials(unittest.TestCase):
session = None
@@ -730,6 +665,24 @@ class TestTickerMiscFinancials(unittest.TestCase):
def test_bad_freq_value_raises_exception(self):
self.assertRaises(ValueError, lambda: self.ticker.get_cashflow(freq="badarg"))
def test_calendar(self):
data = self.ticker.calendar
self.assertIsInstance(data, dict, "data has wrong type")
self.assertTrue(len(data) > 0, "data is empty")
self.assertIn("Earnings Date", data.keys(), "data missing expected key")
self.assertIn("Earnings Average", data.keys(), "data missing expected key")
self.assertIn("Earnings Low", data.keys(), "data missing expected key")
self.assertIn("Earnings High", data.keys(), "data missing expected key")
self.assertIn("Revenue Average", data.keys(), "data missing expected key")
self.assertIn("Revenue Low", data.keys(), "data missing expected key")
self.assertIn("Revenue High", data.keys(), "data missing expected key")
# dividend date is not available for tested ticker GOOGL
if self.ticker.ticker != "GOOGL":
self.assertIn("Dividend Date", data.keys(), "data missing expected key")
# ex-dividend date is not always available
data_cached = self.ticker.calendar
self.assertIs(data, data_cached, "data not cached")
# Below will fail because not ported to Yahoo API
# def test_sustainability(self):
@@ -740,21 +693,60 @@ class TestTickerMiscFinancials(unittest.TestCase):
# data_cached = self.ticker.sustainability
# self.assertIs(data, data_cached, "data not cached")
# def test_recommendations(self):
# data = self.ticker.recommendations
# def test_shares(self):
# data = self.ticker.shares
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
# self.assertFalse(data.empty, "data is empty")
# data_cached = self.ticker.recommendations
# self.assertIs(data, data_cached, "data not cached")
# def test_recommendations_summary(self):
# data = self.ticker.recommendations_summary
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
# self.assertFalse(data.empty, "data is empty")
class TestTickerAnalysts(unittest.TestCase):
session = None
# data_cached = self.ticker.recommendations_summary
# self.assertIs(data, data_cached, "data not cached")
@classmethod
def setUpClass(cls):
cls.session = session_gbl
@classmethod
def tearDownClass(cls):
if cls.session is not None:
cls.session.close()
def setUp(self):
self.ticker = yf.Ticker("GOOGL", session=self.session)
def tearDown(self):
self.ticker = None
def test_recommendations(self):
data = self.ticker.recommendations
data_summary = self.ticker.recommendations_summary
self.assertTrue(data.equals(data_summary))
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.recommendations
self.assertIs(data, data_cached, "data not cached")
def test_recommendations_summary(self): # currently alias for recommendations
data = self.ticker.recommendations_summary
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.recommendations_summary
self.assertIs(data, data_cached, "data not cached")
def test_upgrades_downgrades(self):
data = self.ticker.upgrades_downgrades
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
self.assertTrue(len(data.columns) == 4, "data has wrong number of columns")
self.assertEqual(data.columns.values.tolist(), ['Firm', 'ToGrade', 'FromGrade', 'Action'], "data has wrong column names")
self.assertIsInstance(data.index, pd.DatetimeIndex, "data has wrong index type")
data_cached = self.ticker.upgrades_downgrades
self.assertIs(data, data_cached, "data not cached")
# Below will fail because not ported to Yahoo API
# def test_analyst_price_target(self):
# data = self.ticker.analyst_price_target
@@ -772,18 +764,6 @@ class TestTickerMiscFinancials(unittest.TestCase):
# data_cached = self.ticker.revenue_forecasts
# self.assertIs(data, data_cached, "data not cached")
# def test_calendar(self):
# data = self.ticker.calendar
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
# self.assertFalse(data.empty, "data is empty")
# data_cached = self.ticker.calendar
# self.assertIs(data, data_cached, "data not cached")
# def test_shares(self):
# data = self.ticker.shares
# self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
# self.assertFalse(data.empty, "data is empty")
class TestTickerInfo(unittest.TestCase):
@@ -823,6 +803,18 @@ class TestTickerInfo(unittest.TestCase):
self.assertIn("symbol", data.keys(), f"Did not find expected key '{k}' in info dict")
self.assertEqual(self.symbols[0], data["symbol"], "Wrong symbol value in info dict")
def test_complementary_info(self):
# This test is to check that we can successfully retrieve the trailing PEG ratio
# We don't expect this one to have a trailing PEG ratio
data1 = self.tickers[0].info
self.assertIsNone(data1['trailingPegRatio'])
# This one should have a trailing PEG ratio
data2 = self.tickers[2].info
self.assertIsInstance(data2['trailingPegRatio'], float)
pass
# def test_fast_info_matches_info(self):
# fast_info_keys = set()
# for ticker in self.tickers:

View File

@@ -8,20 +8,20 @@ Specific test class:
python -m unittest tests.utils.TestTicker
"""
from unittest import TestSuite
# import pandas as pd
# import numpy as np
from .context import yfinance as yf
from .context import session_gbl
import unittest
# import requests_cache
import tempfile
import os
class TestUtils(unittest.TestCase):
session = None
class TestCache(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.tempCacheDir = tempfile.TemporaryDirectory()
@@ -36,15 +36,56 @@ class TestUtils(unittest.TestCase):
tkr = 'AMZN'
tz1 = "America/New_York"
tz2 = "London/Europe"
cache = yf.utils.get_tz_cache()
cache = yf.cache.get_tz_cache()
cache.store(tkr, tz1)
cache.store(tkr, tz2)
def test_setTzCacheLocation(self):
self.assertEqual(yf.cache._TzDBManager.get_location(), self.tempCacheDir.name)
tkr = 'AMZN'
tz1 = "America/New_York"
cache = yf.cache.get_tz_cache()
cache.store(tkr, tz1)
self.assertTrue(os.path.exists(os.path.join(self.tempCacheDir.name, "tkr-tz.db")))
class TestCacheNoPermission(unittest.TestCase):
@classmethod
def setUpClass(cls):
yf.set_tz_cache_location("/root/yf-cache")
def test_tzCacheRootStore(self):
# Test that if cache path in read-only filesystem, no exception.
tkr = 'AMZN'
tz1 = "America/New_York"
# During attempt to store, will discover cannot write
yf.cache.get_tz_cache().store(tkr, tz1)
# Handling the store failure replaces cache with a dummy
cache = yf.cache.get_tz_cache()
self.assertTrue(cache.dummy)
cache.store(tkr, tz1)
def test_tzCacheRootLookup(self):
# Test that if cache path in read-only filesystem, no exception.
tkr = 'AMZN'
# During attempt to lookup, will discover cannot write
yf.cache.get_tz_cache().lookup(tkr)
# Handling the lookup failure replaces cache with a dummy
cache = yf.cache.get_tz_cache()
self.assertTrue(cache.dummy)
cache.lookup(tkr)
def suite():
suite = unittest.TestSuite()
suite.addTest(TestUtils('Test utils'))
return suite
ts: TestSuite = unittest.TestSuite()
ts.addTest(TestCache('Test cache'))
ts.addTest(TestCacheNoPermission('Test cache no permission'))
return ts
if __name__ == '__main__':

View File

@@ -23,7 +23,8 @@ from . import version
from .ticker import Ticker
from .tickers import Tickers
from .multi import download
from .utils import set_tz_cache_location, enable_debug_mode
from .utils import enable_debug_mode
from .cache import set_tz_cache_location
__version__ = version.version
__author__ = "Ran Aroussi"

File diff suppressed because it is too large Load Diff

431
yfinance/cache.py Normal file
View File

@@ -0,0 +1,431 @@
import peewee as _peewee
from threading import Lock
import os as _os
import appdirs as _ad
import atexit as _atexit
import datetime as _datetime
import pickle as _pkl
from .utils import get_yf_logger
_cache_init_lock = Lock()
# --------------
# TimeZone cache
# --------------
class _TzCacheException(Exception):
pass
class _TzCacheDummy:
"""Dummy cache to use if tz cache is disabled"""
def lookup(self, tkr):
return None
def store(self, tkr, tz):
pass
@property
def tz_db(self):
return None
class _TzCacheManager:
_tz_cache = None
@classmethod
def get_tz_cache(cls):
if cls._tz_cache is None:
with _cache_init_lock:
cls._initialise()
return cls._tz_cache
@classmethod
def _initialise(cls, cache_dir=None):
cls._tz_cache = _TzCache()
class _TzDBManager:
_db = None
_cache_dir = _os.path.join(_ad.user_cache_dir(), "py-yfinance")
@classmethod
def get_database(cls):
if cls._db is None:
cls._initialise()
return cls._db
@classmethod
def close_db(cls):
if cls._db is not None:
try:
cls._db.close()
except Exception:
# Must discard exceptions because Python trying to quit.
pass
@classmethod
def _initialise(cls, cache_dir=None):
if cache_dir is not None:
cls._cache_dir = cache_dir
if not _os.path.isdir(cls._cache_dir):
try:
_os.makedirs(cls._cache_dir)
except OSError as err:
raise _TzCacheException(f"Error creating TzCache folder: '{cls._cache_dir}' reason: {err}")
elif not (_os.access(cls._cache_dir, _os.R_OK) and _os.access(cls._cache_dir, _os.W_OK)):
raise _TzCacheException(f"Cannot read and write in TzCache folder: '{cls._cache_dir}'")
cls._db = _peewee.SqliteDatabase(
_os.path.join(cls._cache_dir, 'tkr-tz.db'),
pragmas={'journal_mode': 'wal', 'cache_size': -64}
)
old_cache_file_path = _os.path.join(cls._cache_dir, "tkr-tz.csv")
if _os.path.isfile(old_cache_file_path):
_os.remove(old_cache_file_path)
@classmethod
def set_location(cls, new_cache_dir):
if cls._db is not None:
cls._db.close()
cls._db = None
cls._cache_dir = new_cache_dir
@classmethod
def get_location(cls):
return cls._cache_dir
# close DB when Python exists
_atexit.register(_TzDBManager.close_db)
tz_db_proxy = _peewee.Proxy()
class _KV(_peewee.Model):
key = _peewee.CharField(primary_key=True)
value = _peewee.CharField(null=True)
class Meta:
database = tz_db_proxy
without_rowid = True
class _TzCache:
def __init__(self):
self.initialised = -1
self.db = None
self.dummy = False
def get_db(self):
if self.db is not None:
return self.db
try:
self.db = _TzDBManager.get_database()
except _TzCacheException as err:
get_yf_logger().info(f"Failed to create TzCache, reason: {err}. "
"TzCache will not be used. "
"Tip: You can direct cache to use a different location with 'set_tz_cache_location(mylocation)'")
self.dummy = True
return None
return self.db
def initialise(self):
if self.initialised != -1:
return
db = self.get_db()
if db is None:
self.initialised = 0 # failure
return
db.connect()
tz_db_proxy.initialize(db)
try:
db.create_tables([_KV])
except _peewee.OperationalError as e:
if 'WITHOUT' in str(e):
_KV._meta.without_rowid = False
db.create_tables([_KV])
else:
raise
self.initialised = 1 # success
def lookup(self, key):
if self.dummy:
return None
if self.initialised == -1:
self.initialise()
if self.initialised == 0: # failure
return None
try:
return _KV.get(_KV.key == key).value
except _KV.DoesNotExist:
return None
def store(self, key, value):
if self.dummy:
return
if self.initialised == -1:
self.initialise()
if self.initialised == 0: # failure
return
db = self.get_db()
if db is None:
return
try:
if value is None:
q = _KV.delete().where(_KV.key == key)
q.execute()
return
with db.atomic():
_KV.insert(key=key, value=value).execute()
except _peewee.IntegrityError:
# Integrity error means the key already exists. Try updating the key.
old_value = self.lookup(key)
if old_value != value:
get_yf_logger().debug(f"Value for key {key} changed from {old_value} to {value}.")
with db.atomic():
q = _KV.update(value=value).where(_KV.key == key)
q.execute()
def get_tz_cache():
return _TzCacheManager.get_tz_cache()
# --------------
# Cookie cache
# --------------
class _CookieCacheException(Exception):
pass
class _CookieCacheDummy:
"""Dummy cache to use if Cookie cache is disabled"""
def lookup(self, tkr):
return None
def store(self, tkr, Cookie):
pass
@property
def Cookie_db(self):
return None
class _CookieCacheManager:
_Cookie_cache = None
@classmethod
def get_cookie_cache(cls):
if cls._Cookie_cache is None:
with _cache_init_lock:
cls._initialise()
return cls._Cookie_cache
@classmethod
def _initialise(cls, cache_dir=None):
cls._Cookie_cache = _CookieCache()
class _CookieDBManager:
_db = None
_cache_dir = _os.path.join(_ad.user_cache_dir(), "py-yfinance")
@classmethod
def get_database(cls):
if cls._db is None:
cls._initialise()
return cls._db
@classmethod
def close_db(cls):
if cls._db is not None:
try:
cls._db.close()
except Exception:
# Must discard exceptions because Python trying to quit.
pass
@classmethod
def _initialise(cls, cache_dir=None):
if cache_dir is not None:
cls._cache_dir = cache_dir
if not _os.path.isdir(cls._cache_dir):
try:
_os.makedirs(cls._cache_dir)
except OSError as err:
raise _CookieCacheException(f"Error creating CookieCache folder: '{cls._cache_dir}' reason: {err}")
elif not (_os.access(cls._cache_dir, _os.R_OK) and _os.access(cls._cache_dir, _os.W_OK)):
raise _CookieCacheException(f"Cannot read and write in CookieCache folder: '{cls._cache_dir}'")
cls._db = _peewee.SqliteDatabase(
_os.path.join(cls._cache_dir, 'cookies.db'),
pragmas={'journal_mode': 'wal', 'cache_size': -64}
)
@classmethod
def set_location(cls, new_cache_dir):
if cls._db is not None:
cls._db.close()
cls._db = None
cls._cache_dir = new_cache_dir
@classmethod
def get_location(cls):
return cls._cache_dir
# close DB when Python exists
_atexit.register(_CookieDBManager.close_db)
Cookie_db_proxy = _peewee.Proxy()
class ISODateTimeField(_peewee.DateTimeField):
# Ensure Python datetime is read & written correctly for sqlite,
# because user discovered peewee allowed an invalid datetime
# to get written.
def db_value(self, value):
if value and isinstance(value, _datetime.datetime):
return value.isoformat()
return super().db_value(value)
def python_value(self, value):
if value and isinstance(value, str) and 'T' in value:
return _datetime.datetime.fromisoformat(value)
return super().python_value(value)
class _CookieSchema(_peewee.Model):
strategy = _peewee.CharField(primary_key=True)
fetch_date = ISODateTimeField(default=_datetime.datetime.now)
# Which cookie type depends on strategy
cookie_bytes = _peewee.BlobField()
class Meta:
database = Cookie_db_proxy
without_rowid = True
class _CookieCache:
def __init__(self):
self.initialised = -1
self.db = None
self.dummy = False
def get_db(self):
if self.db is not None:
return self.db
try:
self.db = _CookieDBManager.get_database()
except _CookieCacheException as err:
get_yf_logger().info(f"Failed to create CookieCache, reason: {err}. "
"CookieCache will not be used. "
"Tip: You can direct cache to use a different location with 'set_tz_cache_location(mylocation)'")
self.dummy = True
return None
return self.db
def initialise(self):
if self.initialised != -1:
return
db = self.get_db()
if db is None:
self.initialised = 0 # failure
return
db.connect()
Cookie_db_proxy.initialize(db)
try:
db.create_tables([_CookieSchema])
except _peewee.OperationalError as e:
if 'WITHOUT' in str(e):
_CookieSchema._meta.without_rowid = False
db.create_tables([_CookieSchema])
else:
raise
self.initialised = 1 # success
def lookup(self, strategy):
if self.dummy:
return None
if self.initialised == -1:
self.initialise()
if self.initialised == 0: # failure
return None
try:
data = _CookieSchema.get(_CookieSchema.strategy == strategy)
cookie = _pkl.loads(data.cookie_bytes)
return {'cookie':cookie, 'age':_datetime.datetime.now()-data.fetch_date}
except _CookieSchema.DoesNotExist:
return None
def store(self, strategy, cookie):
if self.dummy:
return
if self.initialised == -1:
self.initialise()
if self.initialised == 0: # failure
return
db = self.get_db()
if db is None:
return
try:
q = _CookieSchema.delete().where(_CookieSchema.strategy == strategy)
q.execute()
if cookie is None:
return
with db.atomic():
cookie_pkl = _pkl.dumps(cookie, _pkl.HIGHEST_PROTOCOL)
_CookieSchema.insert(strategy=strategy, cookie_bytes=cookie_pkl).execute()
except _peewee.IntegrityError:
raise
# # Integrity error means the strategy already exists. Try updating the strategy.
# old_value = self.lookup(strategy)
# if old_value != cookie:
# get_yf_logger().debug(f"cookie for strategy {strategy} changed from {old_value} to {cookie}.")
# with db.atomic():
# q = _CookieSchema.update(cookie=cookie).where(_CookieSchema.strategy == strategy)
# q.execute()
def get_cookie_cache():
return _CookieCacheManager.get_cookie_cache()
def set_cache_location(cache_dir: str):
"""
Sets the path to create the "py-yfinance" cache folder in.
Useful if the default folder returned by "appdir.user_cache_dir()" is not writable.
Must be called before cache is used (that is, before fetching tickers).
:param cache_dir: Path to use for caches
:return: None
"""
_TzDBManager.set_location(cache_dir)
_CookieDBManager.set_location(cache_dir)
def set_tz_cache_location(cache_dir: str):
set_cache_location(cache_dir)

View File

@@ -115,4 +115,40 @@ fundamentals_keys = {
"PaymentstoSuppliersforGoodsandServices", "ClassesofCashReceiptsfromOperatingActivities",
"OtherCashReceiptsfromOperatingActivities", "ReceiptsfromGovernmentGrants", "ReceiptsfromCustomers"]}
price_colnames = ['Open', 'High', 'Low', 'Close', 'Adj Close']
_PRICE_COLNAMES_ = ['Open', 'High', 'Low', 'Close', 'Adj Close']
quote_summary_valid_modules = (
"summaryProfile", # contains general information about the company
"summaryDetail", # prices + volume + market cap + etc
"assetProfile", # summaryProfile + company officers
"fundProfile",
"price", # current prices
"quoteType", # quoteType
"esgScores", # Environmental, social, and governance (ESG) scores, sustainability and ethical performance of companies
"incomeStatementHistory",
"incomeStatementHistoryQuarterly",
"balanceSheetHistory",
"balanceSheetHistoryQuarterly",
"cashFlowStatementHistory",
"cashFlowStatementHistoryQuarterly",
"defaultKeyStatistics", # KPIs (PE, enterprise value, EPS, EBITA, and more)
"financialData", # Financial KPIs (revenue, gross margins, operating cash flow, free cash flow, and more)
"calendarEvents", # future earnings date
"secFilings", # SEC filings, such as 10K and 10Q reports
"upgradeDowngradeHistory", # upgrades and downgrades that analysts have given a company's stock
"institutionOwnership", # institutional ownership, holders and shares outstanding
"fundOwnership", # mutual fund ownership, holders and shares outstanding
"majorDirectHolders",
"majorHoldersBreakdown",
"insiderTransactions", # insider transactions, such as the number of shares bought and sold by company executives
"insiderHolders", # insider holders, such as the number of shares held by company executives
"netSharePurchaseActivity", # net share purchase activity, such as the number of shares bought and sold by company executives
"earnings", # earnings history
"earningsHistory",
"earningsTrend", # earnings trend
"industryTrend",
"indexTrend",
"sectorTrend",
"recommendationTrend",
"futuresChain",
)

View File

@@ -1,16 +1,14 @@
import functools
from functools import lru_cache
import logging
import requests as requests
import re
import random
import time
from bs4 import BeautifulSoup
import datetime
from frozendict import frozendict
from . import utils
from . import utils, cache
import threading
cache_maxsize = 64
@@ -36,25 +34,351 @@ def lru_cache_freezeargs(func):
return wrapped
class TickerData:
class SingletonMeta(type):
"""
Have one place to retrieve data from Yahoo API in order to ease caching and speed up operations
Metaclass that creates a Singleton instance.
"""
_instances = {}
_lock = threading.Lock()
def __call__(cls, *args, **kwargs):
with cls._lock:
if cls not in cls._instances:
instance = super().__call__(*args, **kwargs)
cls._instances[cls] = instance
else:
cls._instances[cls]._set_session(*args, **kwargs)
return cls._instances[cls]
class YfData(metaclass=SingletonMeta):
"""
Have one place to retrieve data from Yahoo API in order to ease caching and speed up operations.
Singleton means one session one cookie shared by all threads.
"""
user_agent_headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
def __init__(self, ticker: str, session=None):
self.ticker = ticker
self._session = session or requests
def __init__(self, session=None):
self._session = session or requests.Session()
def get(self, url, user_agent_headers=None, params=None, proxy=None, timeout=30):
proxy = self._get_proxy(proxy)
try:
self._session.cache
except AttributeError:
# Not caching
self._session_is_caching = False
else:
# Is caching. This is annoying.
# Can't simply use a non-caching session to fetch cookie & crumb,
# because then the caching-session won't have cookie.
self._session_is_caching = True
from requests_cache import DO_NOT_CACHE
self._expire_after = DO_NOT_CACHE
self._crumb = None
self._cookie = None
if self._session_is_caching and self._cookie is None:
utils.print_once("WARNING: cookie & crumb does not work well with requests_cache. Am experimenting with 'expire_after=DO_NOT_CACHE', but you need to help stress-test.")
# Default to using 'basic' strategy
self._cookie_strategy = 'basic'
# If it fails, then fallback method is 'csrf'
# self._cookie_strategy = 'csrf'
self._cookie_lock = threading.Lock()
def _set_session(self, session):
if session is None:
return
with self._cookie_lock:
self._session = session
def _set_cookie_strategy(self, strategy, have_lock=False):
if strategy == self._cookie_strategy:
return
if not have_lock:
self._cookie_lock.acquire()
try:
if self._cookie_strategy == 'csrf':
utils.get_yf_logger().debug(f'toggling cookie strategy {self._cookie_strategy} -> basic')
self._session.cookies.clear()
self._cookie_strategy = 'basic'
else:
utils.get_yf_logger().debug(f'toggling cookie strategy {self._cookie_strategy} -> csrf')
self._cookie_strategy = 'csrf'
self._cookie = None
self._crumb = None
except Exception:
self._cookie_lock.release()
raise
if not have_lock:
self._cookie_lock.release()
def _save_session_cookies(self):
try:
cache.get_cookie_cache().store('csrf', self._session.cookies)
except Exception:
return False
return True
def _load_session_cookies(self):
cookie_dict = cache.get_cookie_cache().lookup('csrf')
if cookie_dict is None:
return False
# Periodically refresh, 24 hours seems fair.
if cookie_dict['age'] > datetime.timedelta(days=1):
return False
self._session.cookies.update(cookie_dict['cookie'])
utils.get_yf_logger().debug('loaded persistent cookie')
def _save_cookie_basic(self, cookie):
try:
cache.get_cookie_cache().store('basic', cookie)
except Exception:
return False
return True
def _load_cookie_basic(self):
cookie_dict = cache.get_cookie_cache().lookup('basic')
if cookie_dict is None:
return None
# Periodically refresh, 24 hours seems fair.
if cookie_dict['age'] > datetime.timedelta(days=1):
return None
utils.get_yf_logger().debug('loaded persistent cookie')
return cookie_dict['cookie']
def _get_cookie_basic(self, proxy=None, timeout=30):
if self._cookie is not None:
utils.get_yf_logger().debug('reusing cookie')
return self._cookie
self._cookie = self._load_cookie_basic()
if self._cookie is not None:
return self._cookie
# To avoid infinite recursion, do NOT use self.get()
# - 'allow_redirects' copied from @psychoz971 solution - does it help USA?
response = self._session.get(
url=url,
params=params,
url='https://fc.yahoo.com',
headers=self.user_agent_headers,
proxies=proxy,
timeout=timeout,
headers=user_agent_headers or self.user_agent_headers)
allow_redirects=True)
if not response.cookies:
utils.get_yf_logger().debug("response.cookies = None")
return None
self._cookie = list(response.cookies)[0]
if self._cookie == '':
utils.get_yf_logger().debug("list(response.cookies)[0] = ''")
return None
self._save_cookie_basic(self._cookie)
utils.get_yf_logger().debug(f"fetched basic cookie = {self._cookie}")
return self._cookie
def _get_crumb_basic(self, proxy=None, timeout=30):
if self._crumb is not None:
utils.get_yf_logger().debug('reusing crumb')
return self._crumb
cookie = self._get_cookie_basic()
if cookie is None:
return None
# - 'allow_redirects' copied from @psychoz971 solution - does it help USA?
get_args = {
'url': "https://query1.finance.yahoo.com/v1/test/getcrumb",
'headers': self.user_agent_headers,
'cookies': {cookie.name: cookie.value},
'proxies': proxy,
'timeout': timeout,
'allow_redirects': True
}
if self._session_is_caching:
get_args['expire_after'] = self._expire_after
crumb_response = self._session.get(**get_args)
else:
crumb_response = self._session.get(**get_args)
self._crumb = crumb_response.text
if self._crumb is None or '<html>' in self._crumb:
utils.get_yf_logger().debug("Didn't receive crumb")
return None
utils.get_yf_logger().debug(f"crumb = '{self._crumb}'")
return self._crumb
@utils.log_indent_decorator
def _get_cookie_and_crumb_basic(self, proxy, timeout):
cookie = self._get_cookie_basic(proxy, timeout)
crumb = self._get_crumb_basic(proxy, timeout)
return cookie, crumb
def _get_cookie_csrf(self, proxy, timeout):
if self._cookie is not None:
utils.get_yf_logger().debug('reusing cookie')
return True
elif self._load_session_cookies():
utils.get_yf_logger().debug('reusing persistent cookie')
self._cookie = True
return True
base_args = {
'headers': self.user_agent_headers,
'proxies': proxy,
'timeout': timeout}
get_args = {**base_args, 'url': 'https://guce.yahoo.com/consent'}
if self._session_is_caching:
get_args['expire_after'] = self._expire_after
response = self._session.get(**get_args)
else:
response = self._session.get(**get_args)
soup = BeautifulSoup(response.content, 'html.parser')
csrfTokenInput = soup.find('input', attrs={'name': 'csrfToken'})
if csrfTokenInput is None:
utils.get_yf_logger().debug('Failed to find "csrfToken" in response')
return False
csrfToken = csrfTokenInput['value']
utils.get_yf_logger().debug(f'csrfToken = {csrfToken}')
sessionIdInput = soup.find('input', attrs={'name': 'sessionId'})
sessionId = sessionIdInput['value']
utils.get_yf_logger().debug(f"sessionId='{sessionId}")
originalDoneUrl = 'https://finance.yahoo.com/'
namespace = 'yahoo'
data = {
'agree': ['agree', 'agree'],
'consentUUID': 'default',
'sessionId': sessionId,
'csrfToken': csrfToken,
'originalDoneUrl': originalDoneUrl,
'namespace': namespace,
}
post_args = {**base_args,
'url': f'https://consent.yahoo.com/v2/collectConsent?sessionId={sessionId}',
'data': data}
get_args = {**base_args,
'url': f'https://guce.yahoo.com/copyConsent?sessionId={sessionId}',
'data': data}
if self._session_is_caching:
post_args['expire_after'] = self._expire_after
get_args['expire_after'] = self._expire_after
self._session.post(**post_args)
self._session.get(**get_args)
else:
self._session.post(**post_args)
self._session.get(**get_args)
self._cookie = True
self._save_session_cookies()
return True
@utils.log_indent_decorator
def _get_crumb_csrf(self, proxy=None, timeout=30):
# Credit goes to @bot-unit #1729
if self._crumb is not None:
utils.get_yf_logger().debug('reusing crumb')
return self._crumb
if not self._get_cookie_csrf(proxy, timeout):
# This cookie stored in session
return None
get_args = {
'url': 'https://query2.finance.yahoo.com/v1/test/getcrumb',
'headers': self.user_agent_headers,
'proxies': proxy,
'timeout': timeout}
if self._session_is_caching:
get_args['expire_after'] = self._expire_after
r = self._session.get(**get_args)
else:
r = self._session.get(**get_args)
self._crumb = r.text
if self._crumb is None or '<html>' in self._crumb or self._crumb == '':
utils.get_yf_logger().debug("Didn't receive crumb")
return None
utils.get_yf_logger().debug(f"crumb = '{self._crumb}'")
return self._crumb
@utils.log_indent_decorator
def _get_cookie_and_crumb(self, proxy=None, timeout=30):
cookie, crumb, strategy = None, None, None
utils.get_yf_logger().debug(f"cookie_mode = '{self._cookie_strategy}'")
with self._cookie_lock:
if self._cookie_strategy == 'csrf':
crumb = self._get_crumb_csrf()
if crumb is None:
# Fail
self._set_cookie_strategy('basic', have_lock=True)
cookie, crumb = self._get_cookie_and_crumb_basic(proxy, timeout)
else:
# Fallback strategy
cookie, crumb = self._get_cookie_and_crumb_basic(proxy, timeout)
if cookie is None or crumb is None:
# Fail
self._set_cookie_strategy('csrf', have_lock=True)
crumb = self._get_crumb_csrf()
strategy = self._cookie_strategy
return cookie, crumb, strategy
@utils.log_indent_decorator
def get(self, url, user_agent_headers=None, params=None, proxy=None, timeout=30):
# Important: treat input arguments as immutable.
if len(url) > 200:
utils.get_yf_logger().debug(f'url={url[:200]}...')
else:
utils.get_yf_logger().debug(f'url={url}')
utils.get_yf_logger().debug(f'params={params}')
proxy = self._get_proxy(proxy)
if params is None:
params = {}
if 'crumb' in params:
raise Exception("Don't manually add 'crumb' to params dict, let data.py handle it")
cookie, crumb, strategy = self._get_cookie_and_crumb()
if crumb is not None:
crumbs = {'crumb': crumb}
else:
crumbs = {}
if strategy == 'basic' and cookie is not None:
# Basic cookie strategy adds cookie to GET parameters
cookies = {cookie.name: cookie.value}
else:
cookies = None
request_args = {
'url': url,
'params': {**params, **crumbs},
'cookies': cookies,
'proxies': proxy,
'timeout': timeout,
'headers': user_agent_headers or self.user_agent_headers
}
response = self._session.get(**request_args)
utils.get_yf_logger().debug(f'response code={response.status_code}')
if response.status_code >= 400:
# Retry with other cookie strategy
if strategy == 'basic':
self._set_cookie_strategy('csrf')
else:
self._set_cookie_strategy('basic')
cookie, crumb, strategy = self._get_cookie_and_crumb(proxy, timeout)
request_args['params']['crumb'] = crumb
if strategy == 'basic':
request_args['cookies'] = {cookie.name: cookie.value}
response = self._session.get(**request_args)
utils.get_yf_logger().debug(f'response code={response.status_code}')
return response
@lru_cache_freezeargs
@@ -71,6 +395,7 @@ class TickerData:
return proxy
def get_raw_json(self, url, user_agent_headers=None, params=None, proxy=None, timeout=30):
utils.get_yf_logger().debug(f'get_raw_json(): {url}')
response = self.get(url, user_agent_headers=user_agent_headers, params=params, proxy=proxy, timeout=timeout)
response.raise_for_status()
return response.json()

View File

@@ -29,6 +29,7 @@ import multitasking as _multitasking
import pandas as _pd
from . import Ticker, utils
from .data import YfData
from . import shared
@@ -143,6 +144,9 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
shared._ERRORS = {}
shared._TRACEBACKS = {}
# Ensure data initialised with session.
YfData(session=session)
# download using threads
if threads:
if threads is True:
@@ -154,7 +158,7 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
actions=actions, auto_adjust=auto_adjust,
back_adjust=back_adjust, repair=repair, keepna=keepna,
progress=(progress and i > 0), proxy=proxy,
rounding=rounding, timeout=timeout, session=session)
rounding=rounding, timeout=timeout)
while len(shared._DFS) < len(tickers):
_time.sleep(0.01)
# download synchronously
@@ -165,10 +169,10 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
actions=actions, auto_adjust=auto_adjust,
back_adjust=back_adjust, repair=repair, keepna=keepna,
proxy=proxy,
rounding=rounding, timeout=timeout, session=session)
rounding=rounding, timeout=timeout)
if progress:
shared._PROGRESS_BAR.animate()
if progress:
shared._PROGRESS_BAR.completed()
@@ -213,12 +217,12 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
try:
data = _pd.concat(shared._DFS.values(), axis=1, sort=True,
keys=shared._DFS.keys())
keys=shared._DFS.keys(), names=['Ticker', 'Price'])
except Exception:
_realign_dfs()
data = _pd.concat(shared._DFS.values(), axis=1, sort=True,
keys=shared._DFS.keys())
keys=shared._DFS.keys(), names=['Ticker', 'Price'])
data.index = _pd.to_datetime(data.index)
# switch names back to isins if applicable
data.rename(columns=shared._ISINS, inplace=True)
@@ -257,10 +261,10 @@ def _download_one_threaded(ticker, start=None, end=None,
auto_adjust=False, back_adjust=False, repair=False,
actions=False, progress=True, period="max",
interval="1d", prepost=False, proxy=None,
keepna=False, rounding=False, timeout=10, session=None):
data = _download_one(ticker, start, end, auto_adjust, back_adjust, repair,
keepna=False, rounding=False, timeout=10):
_download_one(ticker, start, end, auto_adjust, back_adjust, repair,
actions, period, interval, prepost, proxy, rounding,
keepna, timeout, session)
keepna, timeout)
if progress:
shared._PROGRESS_BAR.animate()
@@ -269,10 +273,10 @@ def _download_one(ticker, start=None, end=None,
auto_adjust=False, back_adjust=False, repair=False,
actions=False, period="max", interval="1d",
prepost=False, proxy=None, rounding=False,
keepna=False, timeout=10, session=None):
keepna=False, timeout=10):
data = None
try:
data = Ticker(ticker, session=session).history(
data = Ticker(ticker).history(
period=period, interval=interval,
start=start, end=end, prepost=prepost,
actions=actions, auto_adjust=auto_adjust,

View File

@@ -1,14 +1,14 @@
import pandas as pd
from yfinance import utils
from yfinance.data import TickerData
from yfinance.data import YfData
from yfinance.exceptions import YFNotImplementedError
class Analysis:
def __init__(self, data: TickerData, proxy=None):
def __init__(self, data: YfData, symbol: str, proxy=None):
self._data = data
self._symbol = symbol
self.proxy = proxy
self._earnings_trend = None

View File

@@ -4,14 +4,15 @@ import json
import pandas as pd
from yfinance import utils, const
from yfinance.data import TickerData
from yfinance.data import YfData
from yfinance.exceptions import YFinanceException, YFNotImplementedError
class Fundamentals:
def __init__(self, data: TickerData, proxy=None):
def __init__(self, data: YfData, symbol: str, proxy=None):
self._data = data
self._symbol = symbol
self.proxy = proxy
self._earnings = None
@@ -21,7 +22,7 @@ class Fundamentals:
self._financials_data = None
self._fin_data_quote = None
self._basics_already_scraped = False
self._financials = Financials(data)
self._financials = Financials(data, symbol)
@property
def financials(self) -> "Financials":
@@ -41,8 +42,9 @@ class Fundamentals:
class Financials:
def __init__(self, data: TickerData):
def __init__(self, data: YfData, symbol: str):
self._data = data
self._symbol = symbol
self._income_time_series = {}
self._balance_sheet_time_series = {}
self._cash_flow_time_series = {}
@@ -77,7 +79,7 @@ class Financials:
if name not in allowed_names:
raise ValueError(f"Illegal argument: name must be one of: {allowed_names}")
if timescale not in allowed_timescales:
raise ValueError(f"Illegal argument: timescale must be one of: {allowed_names}")
raise ValueError(f"Illegal argument: timescale must be one of: {allowed_timescales}")
try:
statement = self._create_financials_table(name, timescale, proxy)
@@ -85,7 +87,7 @@ class Financials:
if statement is not None:
return statement
except YFinanceException as e:
utils.get_yf_logger().error(f"{self._data.ticker}: Failed to create {name} financials table for reason: {e}")
utils.get_yf_logger().error(f"{self._symbol}: Failed to create {name} financials table for reason: {e}")
return pd.DataFrame()
def _create_financials_table(self, name, timescale, proxy):
@@ -97,7 +99,7 @@ class Financials:
try:
return self.get_financials_time_series(timescale, keys, proxy)
except Exception as e:
except Exception:
pass
def get_financials_time_series(self, timescale, keys: list, proxy=None) -> pd.DataFrame:
@@ -105,7 +107,7 @@ class Financials:
timescale = timescale_translation[timescale]
# Step 2: construct url:
ts_url_base = f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{self._data.ticker}?symbol={self._data.ticker}"
ts_url_base = f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{self._symbol}?symbol={self._symbol}"
url = ts_url_base + "&type=" + ",".join([timescale + k for k in keys])
# Yahoo returns maximum 4 years or 5 quarters, regardless of start_dt:
start_dt = datetime.datetime(2016, 12, 31)

1630
yfinance/scrapers/history.py Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -1,67 +1,246 @@
import pandas as pd
# from io import StringIO
from yfinance.data import TickerData
import pandas as pd
import requests
from yfinance import utils
from yfinance.data import YfData
from yfinance.const import _BASE_URL_
from yfinance.exceptions import YFinanceDataException
_QUOTE_SUMMARY_URL_ = f"{_BASE_URL_}/v10/finance/quoteSummary/"
class Holders:
_SCRAPE_URL_ = 'https://finance.yahoo.com/quote'
def __init__(self, data: TickerData, proxy=None):
def __init__(self, data: YfData, symbol: str, proxy=None):
self._data = data
self._symbol = symbol
self.proxy = proxy
self._major = None
self._major_direct_holders = None
self._institutional = None
self._mutualfund = None
self._insider_transactions = None
self._insider_purchases = None
self._insider_roster = None
@property
def major(self) -> pd.DataFrame:
if self._major is None:
self._scrape(self.proxy)
# self._scrape(self.proxy)
self._fetch_and_parse()
return self._major
@property
def institutional(self) -> pd.DataFrame:
if self._institutional is None:
self._scrape(self.proxy)
# self._scrape(self.proxy)
self._fetch_and_parse()
return self._institutional
@property
def mutualfund(self) -> pd.DataFrame:
if self._mutualfund is None:
self._scrape(self.proxy)
# self._scrape(self.proxy)
self._fetch_and_parse()
return self._mutualfund
def _scrape(self, proxy):
ticker_url = f"{self._SCRAPE_URL_}/{self._data.ticker}"
@property
def insider_transactions(self) -> pd.DataFrame:
if self._insider_transactions is None:
# self._scrape_insider_transactions(self.proxy)
self._fetch_and_parse()
return self._insider_transactions
@property
def insider_purchases(self) -> pd.DataFrame:
if self._insider_purchases is None:
# self._scrape_insider_transactions(self.proxy)
self._fetch_and_parse()
return self._insider_purchases
@property
def insider_roster(self) -> pd.DataFrame:
if self._insider_roster is None:
# self._scrape_insider_ros(self.proxy)
self._fetch_and_parse()
return self._insider_roster
def _fetch(self, proxy):
modules = ','.join(
["institutionOwnership", "fundOwnership", "majorDirectHolders", "majorHoldersBreakdown", "insiderTransactions", "insiderHolders", "netSharePurchaseActivity"])
params_dict = {"modules": modules, "corsDomain": "finance.yahoo.com", "formatted": "false"}
result = self._data.get_raw_json(f"{_QUOTE_SUMMARY_URL_}/{self._symbol}", user_agent_headers=self._data.user_agent_headers, params=params_dict, proxy=proxy)
return result
def _fetch_and_parse(self):
try:
resp = self._data.cache_get(ticker_url + '/holders', proxy=proxy)
holders = pd.read_html(resp.text)
except Exception:
holders = []
result = self._fetch(self.proxy)
except requests.exceptions.HTTPError as e:
utils.get_yf_logger().error(str(e))
if len(holders) >= 3:
self._major = holders[0]
self._institutional = holders[1]
self._mutualfund = holders[2]
elif len(holders) >= 2:
self._major = holders[0]
self._institutional = holders[1]
elif len(holders) >= 1:
self._major = holders[0]
self._major = pd.DataFrame()
self._major_direct_holders = pd.DataFrame()
self._institutional = pd.DataFrame()
self._mutualfund = pd.DataFrame()
self._insider_transactions = pd.DataFrame()
self._insider_purchases = pd.DataFrame()
self._insider_roster = pd.DataFrame()
if self._institutional is not None:
if 'Date Reported' in self._institutional:
self._institutional['Date Reported'] = pd.to_datetime(
self._institutional['Date Reported'])
if '% Out' in self._institutional:
self._institutional['% Out'] = self._institutional[
'% Out'].str.replace('%', '').astype(float) / 100
return
if self._mutualfund is not None:
if 'Date Reported' in self._mutualfund:
self._mutualfund['Date Reported'] = pd.to_datetime(
self._mutualfund['Date Reported'])
if '% Out' in self._mutualfund:
self._mutualfund['% Out'] = self._mutualfund[
'% Out'].str.replace('%', '').astype(float) / 100
try:
data = result["quoteSummary"]["result"][0]
# parse "institutionOwnership", "fundOwnership", "majorDirectHolders", "majorHoldersBreakdown", "insiderTransactions", "insiderHolders", "netSharePurchaseActivity"
self._parse_institution_ownership(data["institutionOwnership"])
self._parse_fund_ownership(data["fundOwnership"])
# self._parse_major_direct_holders(data["majorDirectHolders"]) # need more data to investigate
self._parse_major_holders_breakdown(data["majorHoldersBreakdown"])
self._parse_insider_transactions(data["insiderTransactions"])
self._parse_insider_holders(data["insiderHolders"])
self._parse_net_share_purchase_activity(data["netSharePurchaseActivity"])
except (KeyError, IndexError):
raise YFinanceDataException("Failed to parse holders json data.")
@staticmethod
def _parse_raw_values(data):
if isinstance(data, dict) and "raw" in data:
return data["raw"]
return data
def _parse_institution_ownership(self, data):
holders = data["ownershipList"]
for owner in holders:
for k, v in owner.items():
owner[k] = self._parse_raw_values(v)
del owner["maxAge"]
df = pd.DataFrame(holders)
if not df.empty:
df["reportDate"] = pd.to_datetime(df["reportDate"], unit="s")
df.rename(columns={"reportDate": "Date Reported", "organization": "Holder", "position": "Shares", "value": "Value"}, inplace=True) # "pctHeld": "% Out"
self._institutional = df
def _parse_fund_ownership(self, data):
holders = data["ownershipList"]
for owner in holders:
for k, v in owner.items():
owner[k] = self._parse_raw_values(v)
del owner["maxAge"]
df = pd.DataFrame(holders)
if not df.empty:
df["reportDate"] = pd.to_datetime(df["reportDate"], unit="s")
df.rename(columns={"reportDate": "Date Reported", "organization": "Holder", "position": "Shares", "value": "Value"}, inplace=True)
self._mutualfund = df
def _parse_major_direct_holders(self, data):
holders = data["holders"]
for owner in holders:
for k, v in owner.items():
owner[k] = self._parse_raw_values(v)
del owner["maxAge"]
df = pd.DataFrame(holders)
if not df.empty:
df["reportDate"] = pd.to_datetime(df["reportDate"], unit="s")
df.rename(columns={"reportDate": "Date Reported", "organization": "Holder", "positionDirect": "Shares", "valueDirect": "Value"}, inplace=True)
self._major_direct_holders = df
def _parse_major_holders_breakdown(self, data):
if "maxAge" in data:
del data["maxAge"]
df = pd.DataFrame.from_dict(data, orient="index")
if not df.empty:
df.columns.name = "Breakdown"
df.rename(columns={df.columns[0]: 'Value'}, inplace=True)
self._major = df
def _parse_insider_transactions(self, data):
holders = data["transactions"]
for owner in holders:
for k, v in owner.items():
owner[k] = self._parse_raw_values(v)
del owner["maxAge"]
df = pd.DataFrame(holders)
if not df.empty:
df["startDate"] = pd.to_datetime(df["startDate"], unit="s")
df.rename(columns={
"startDate": "Start Date",
"filerName": "Insider",
"filerRelation": "Position",
"filerUrl": "URL",
"moneyText": "Transaction",
"transactionText": "Text",
"shares": "Shares",
"value": "Value",
"ownership": "Ownership" # ownership flag, direct or institutional
}, inplace=True)
self._insider_transactions = df
def _parse_insider_holders(self, data):
holders = data["holders"]
for owner in holders:
for k, v in owner.items():
owner[k] = self._parse_raw_values(v)
del owner["maxAge"]
df = pd.DataFrame(holders)
if not df.empty:
df["positionDirectDate"] = pd.to_datetime(df["positionDirectDate"], unit="s")
df["latestTransDate"] = pd.to_datetime(df["latestTransDate"], unit="s")
df.rename(columns={
"name": "Name",
"relation": "Position",
"url": "URL",
"transactionDescription": "Most Recent Transaction",
"latestTransDate": "Latest Transaction Date",
"positionDirectDate": "Position Direct Date",
"positionDirect": "Shares Owned Directly",
"positionIndirectDate": "Position Indirect Date",
"positionIndirect": "Shares Owned Indirectly"
}, inplace=True)
df["Name"] = df["Name"].astype(str)
df["Position"] = df["Position"].astype(str)
df["URL"] = df["URL"].astype(str)
df["Most Recent Transaction"] = df["Most Recent Transaction"].astype(str)
self._insider_roster = df
def _parse_net_share_purchase_activity(self, data):
df = pd.DataFrame(
{
"Insider Purchases Last " + data.get("period", ""): [
"Purchases",
"Sales",
"Net Shares Purchased (Sold)",
"Total Insider Shares Held",
"% Net Shares Purchased (Sold)",
"% Buy Shares",
"% Sell Shares"
],
"Shares": [
data.get('buyInfoShares'),
data.get('sellInfoShares'),
data.get('netInfoShares'),
data.get('totalInsiderShares'),
data.get('netPercentInsiderShares'),
data.get('buyPercentInsiderShares'),
data.get('sellPercentInsiderShares')
],
"Trans": [
data.get('buyInfoCount'),
data.get('sellInfoCount'),
data.get('netInfoCount'),
pd.NA,
pd.NA,
pd.NA,
pd.NA
]
}
).convert_dtypes()
self._insider_purchases = df

View File

@@ -1,15 +1,16 @@
import datetime
import json
import logging
import warnings
from collections.abc import MutableMapping
import numpy as _np
import pandas as pd
import requests
from yfinance import utils
from yfinance.data import TickerData
from yfinance.exceptions import YFNotImplementedError
from yfinance.data import YfData
from yfinance.const import quote_summary_valid_modules, _BASE_URL_
from yfinance.exceptions import YFNotImplementedError, YFinanceDataException, YFinanceException
info_retired_keys_price = {"currentPrice", "dayHigh", "dayLow", "open", "previousClose", "volume", "volume24Hr"}
info_retired_keys_price.update({"regularMarket"+s for s in ["DayHigh", "DayLow", "Open", "PreviousClose", "Price", "Volume"]})
@@ -21,11 +22,11 @@ info_retired_keys_symbol = {"symbol"}
info_retired_keys = info_retired_keys_price | info_retired_keys_exchange | info_retired_keys_marketCap | info_retired_keys_symbol
_BASIC_URL_ = "https://query2.finance.yahoo.com/v6/finance/quoteSummary"
_QUOTE_SUMMARY_URL_ = f"{_BASE_URL_}/v10/finance/quoteSummary"
class InfoDictWrapper(MutableMapping):
""" Simple wrapper around info dict, intercepting 'gets' to
""" Simple wrapper around info dict, intercepting 'gets' to
print how-to-migrate messages for specific keys. Requires
override dict API"""
@@ -67,7 +68,7 @@ class InfoDictWrapper(MutableMapping):
def __iter__(self):
return iter(self.info)
def __len__(self):
return len(self.info)
@@ -125,7 +126,7 @@ class FastInfo:
_properties += ["fifty_day_average", "two_hundred_day_average", "ten_day_average_volume", "three_month_average_volume"]
_properties += ["year_high", "year_low", "year_change"]
# Because released before fixing key case, need to officially support
# Because released before fixing key case, need to officially support
# camel-case but also secretly support snake-case
base_keys = [k for k in _properties if '_' not in k]
@@ -133,7 +134,7 @@ class FastInfo:
self._sc_to_cc_key = {k: utils.snake_case_2_camelCase(k) for k in sc_keys}
self._cc_to_sc_key = {v: k for k, v in self._sc_to_cc_key.items()}
self._public_keys = sorted(base_keys + list(self._sc_to_cc_key.values()))
self._keys = sorted(self._public_keys + sc_keys)
@@ -156,7 +157,7 @@ class FastInfo:
def __getitem__(self, k):
if not isinstance(k, str):
raise KeyError(f"key must be a string")
raise KeyError("key must be a string")
if k not in self._keys:
raise KeyError(f"'{k}' not valid key. Examine 'FastInfo.keys()'")
if k in self._cc_to_sc_key:
@@ -176,15 +177,11 @@ class FastInfo:
return self.__str__()
def toJSON(self, indent=4):
d = {k: self[k] for k in self.keys()}
return json.dumps({k: self[k] for k in self.keys()}, indent=indent)
def _get_1y_prices(self, fullDaysOnly=False):
if self._prices_1y is None:
# Temporarily disable error printing
logging.disable(logging.CRITICAL)
self._prices_1y = self._tkr.history(period="380d", auto_adjust=False, keepna=True, proxy=self.proxy)
logging.disable(logging.NOTSET)
self._md = self._tkr.get_history_metadata(proxy=self.proxy)
try:
ctp = self._md["currentTradingPeriod"]
@@ -210,18 +207,12 @@ class FastInfo:
def _get_1wk_1h_prepost_prices(self):
if self._prices_1wk_1h_prepost is None:
# Temporarily disable error printing
logging.disable(logging.CRITICAL)
self._prices_1wk_1h_prepost = self._tkr.history(period="1wk", interval="1h", auto_adjust=False, prepost=True, proxy=self.proxy)
logging.disable(logging.NOTSET)
return self._prices_1wk_1h_prepost
def _get_1wk_1h_reg_prices(self):
if self._prices_1wk_1h_reg is None:
# Temporarily disable error printing
logging.disable(logging.CRITICAL)
self._prices_1wk_1h_reg = self._tkr.history(period="1wk", interval="1h", auto_adjust=False, prepost=False, proxy=self.proxy)
logging.disable(logging.NOTSET)
return self._prices_1wk_1h_reg
def _get_exchange_metadata(self):
@@ -260,8 +251,6 @@ class FastInfo:
if self._currency is not None:
return self._currency
if self._tkr._history_metadata is None:
self._get_1y_prices()
md = self._tkr.get_history_metadata(proxy=self.proxy)
self._currency = md["currency"]
return self._currency
@@ -271,8 +260,6 @@ class FastInfo:
if self._quote_type is not None:
return self._quote_type
if self._tkr._history_metadata is None:
self._get_1y_prices()
md = self._tkr.get_history_metadata(proxy=self.proxy)
self._quote_type = md["instrumentType"]
return self._quote_type
@@ -336,7 +323,7 @@ class FastInfo:
else:
prices = prices[["Close"]].groupby(prices.index.date).last()
if prices.shape[0] < 2:
# Very few symbols have previousClose despite no
# Very few symbols have previousClose despite no
# no trading data e.g. 'QCSTIX'.
fail = True
else:
@@ -355,12 +342,12 @@ class FastInfo:
return self._reg_prev_close
prices = self._get_1y_prices()
if prices.shape[0] == 1:
# Tiny % of tickers don't return daily history before last trading day,
# Tiny % of tickers don't return daily history before last trading day,
# so backup option is hourly history:
prices = self._get_1wk_1h_reg_prices()
prices = prices[["Close"]].groupby(prices.index.date).last()
if prices.shape[0] < 2:
# Very few symbols have regularMarketPreviousClose despite no
# Very few symbols have regularMarketPreviousClose despite no
# no trading data. E.g. 'QCSTIX'.
# So fallback to original info[] if available.
self._tkr.info # trigger fetch
@@ -551,14 +538,16 @@ class FastInfo:
class Quote:
def __init__(self, data: TickerData, proxy=None):
def __init__(self, data: YfData, symbol: str, proxy=None):
self._data = data
self._symbol = symbol
self.proxy = proxy
self._info = None
self._retired_info = None
self._sustainability = None
self._recommendations = None
self._upgrades_downgrades = None
self._calendar = None
self._already_scraped = False
@@ -568,7 +557,7 @@ class Quote:
@property
def info(self) -> dict:
if self._info is None:
self._fetch(self.proxy)
self._fetch_info(self.proxy)
self._fetch_complementary(self.proxy)
return self._info
@@ -582,27 +571,75 @@ class Quote:
@property
def recommendations(self) -> pd.DataFrame:
if self._recommendations is None:
raise YFNotImplementedError('recommendations')
result = self._fetch(self.proxy, modules=['recommendationTrend'])
if result is None:
self._recommendations = pd.DataFrame()
else:
try:
data = result["quoteSummary"]["result"][0]["recommendationTrend"]["trend"]
except (KeyError, IndexError):
raise YFinanceDataException(f"Failed to parse json response from Yahoo Finance: {result}")
self._recommendations = pd.DataFrame(data)
return self._recommendations
@property
def calendar(self) -> pd.DataFrame:
def upgrades_downgrades(self) -> pd.DataFrame:
if self._upgrades_downgrades is None:
result = self._fetch(self.proxy, modules=['upgradeDowngradeHistory'])
if result is None:
self._upgrades_downgrades = pd.DataFrame()
else:
try:
data = result["quoteSummary"]["result"][0]["upgradeDowngradeHistory"]["history"]
if len(data) == 0:
raise YFinanceDataException(f"No upgrade/downgrade history found for {self._symbol}")
df = pd.DataFrame(data)
df.rename(columns={"epochGradeDate": "GradeDate", 'firm': 'Firm', 'toGrade': 'ToGrade', 'fromGrade': 'FromGrade', 'action': 'Action'}, inplace=True)
df.set_index('GradeDate', inplace=True)
df.index = pd.to_datetime(df.index, unit='s')
self._upgrades_downgrades = df
except (KeyError, IndexError):
raise YFinanceDataException(f"Failed to parse json response from Yahoo Finance: {result}")
return self._upgrades_downgrades
@property
def calendar(self) -> dict:
if self._calendar is None:
raise YFNotImplementedError('calendar')
self._fetch_calendar()
return self._calendar
def _fetch(self, proxy):
@staticmethod
def valid_modules():
return quote_summary_valid_modules
def _fetch(self, proxy, modules: list):
if not isinstance(modules, list):
raise YFinanceException("Should provide a list of modules, see available modules using `valid_modules`")
modules = ','.join([m for m in modules if m in quote_summary_valid_modules])
if len(modules) == 0:
raise YFinanceException("No valid modules provided, see available modules using `valid_modules`")
params_dict = {"modules": modules, "corsDomain": "finance.yahoo.com", "formatted": "false", "symbol": self._symbol}
try:
result = self._data.get_raw_json(_QUOTE_SUMMARY_URL_ + f"/{self._symbol}", user_agent_headers=self._data.user_agent_headers, params=params_dict, proxy=proxy)
except requests.exceptions.HTTPError as e:
utils.get_yf_logger().error(str(e))
return None
return result
def _fetch_info(self, proxy):
if self._already_fetched:
return
self._already_fetched = True
modules = ['financialData', 'quoteType', 'defaultKeyStatistics', 'assetProfile', 'summaryDetail']
params_dict = {"modules": modules, "ssl": "true"}
result = self._data.get_raw_json(
_BASIC_URL_ + f"/{self._data.ticker}", params=params_dict, proxy=proxy
)
result["quoteSummary"]["result"][0]["symbol"] = self._data.ticker
result = self._fetch(proxy, modules=modules)
if result is None:
self._info = {}
return
result["quoteSummary"]["result"][0]["symbol"] = self._symbol
query1_info = next(
(info for info in result.get("quoteSummary", {}).get("result", []) if info["symbol"] == self._data.ticker),
(info for info in result.get("quoteSummary", {}).get("result", []) if info["symbol"] == self._symbol),
None,
)
# Most keys that appear in multiple dicts have same value. Except 'maxAge' because
@@ -611,10 +648,10 @@ class Quote:
if "maxAge" in query1_info[k] and query1_info[k]["maxAge"] == 1:
query1_info[k]["maxAge"] = 86400
query1_info = {
k1: v1
for k, v in query1_info.items()
if isinstance(v, dict)
for k1, v1 in v.items()
k1: v1
for k, v in query1_info.items()
if isinstance(v, dict)
for k1, v1 in v.items()
if v1
}
# recursively format but only because of 'companyOfficers'
@@ -641,7 +678,7 @@ class Quote:
self._already_fetched_complementary = True
# self._scrape(proxy) # decrypt broken
self._fetch(proxy)
self._fetch_info(proxy)
if self._info is None:
return
@@ -670,7 +707,7 @@ class Quote:
# pass
#
# For just one/few variable is faster to query directly:
url = f"https://query1.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{self._data.ticker}?symbol={self._data.ticker}"
url = f"https://query1.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{self._symbol}?symbol={self._symbol}"
for k in keys:
url += "&type=" + k
# Request 6 months of data
@@ -682,14 +719,39 @@ class Quote:
json_str = self._data.cache_get(url=url, proxy=proxy).text
json_data = json.loads(json_str)
try:
key_stats = json_data["timeseries"]["result"][0]
if k not in key_stats:
# Yahoo website prints N/A, indicates Yahoo lacks necessary data to calculate
v = None
json_result = json_data.get("timeseries") or json_data.get("finance")
if json_result["error"] is not None:
raise YFinanceException("Failed to parse json response from Yahoo Finance: " + str(json_result["error"]))
for k in keys:
keydict = json_result["result"][0]
if k in keydict:
self._info[k] = keydict[k][-1]["reportedValue"]["raw"]
else:
# Select most recent (last) raw value in list:
v = key_stats[k][-1]["reportedValue"]["raw"]
except Exception:
v = None
self._info[k] = v
self.info[k] = None
def _fetch_calendar(self):
# secFilings return too old data, so not requesting it for now
result = self._fetch(self.proxy, modules=['calendarEvents'])
if result is None:
self._calendar = {}
return
try:
self._calendar = dict()
_events = result["quoteSummary"]["result"][0]["calendarEvents"]
if 'dividendDate' in _events:
self._calendar['Dividend Date'] = datetime.datetime.fromtimestamp(_events['dividendDate']).date()
if 'exDividendDate' in _events:
self._calendar['Ex-Dividend Date'] = datetime.datetime.fromtimestamp(_events['exDividendDate']).date()
# splits = _events.get('splitDate') # need to check later, i will add code for this if found data
earnings = _events.get('earnings')
if earnings is not None:
self._calendar['Earnings Date'] = [datetime.datetime.fromtimestamp(d).date() for d in earnings.get('earningsDate', [])]
self._calendar['Earnings High'] = earnings.get('earningsHigh', None)
self._calendar['Earnings Low'] = earnings.get('earningsLow', None)
self._calendar['Earnings Average'] = earnings.get('earningsAverage', None)
self._calendar['Revenue High'] = earnings.get('revenueHigh', None)
self._calendar['Revenue Low'] = earnings.get('revenueLow', None)
self._calendar['Revenue Average'] = earnings.get('revenueAverage', None)
except (KeyError, IndexError):
raise YFinanceDataException(f"Failed to parse json response from Yahoo Finance: {result}")

View File

@@ -27,24 +27,25 @@ from collections import namedtuple as _namedtuple
import pandas as _pd
from .base import TickerBase
from .const import _BASE_URL_
class Ticker(TickerBase):
def __init__(self, ticker, session=None):
super(Ticker, self).__init__(ticker, session=session)
def __init__(self, ticker, session=None, proxy=None):
super(Ticker, self).__init__(ticker, session=session, proxy=proxy)
self._expirations = {}
self._underlying = {}
def __repr__(self):
return f'yfinance.Ticker object <{self.ticker}>'
def _download_options(self, date=None, proxy=None):
def _download_options(self, date=None):
if date is None:
url = f"{self._base_url}/v7/finance/options/{self.ticker}"
url = f"{_BASE_URL_}/v7/finance/options/{self.ticker}"
else:
url = f"{self._base_url}/v7/finance/options/{self.ticker}?date={date}"
url = f"{_BASE_URL_}/v7/finance/options/{self.ticker}?date={date}"
r = self._data.get(url=url, proxy=proxy).json()
r = self._data.get(url=url, proxy=self.proxy).json()
if len(r.get('optionChain', {}).get('result', [])) > 0:
for exp in r['optionChain']['result'][0]['expirationDates']:
self._expirations[_datetime.datetime.utcfromtimestamp(
@@ -80,9 +81,9 @@ class Ticker(TickerBase):
data['lastTradeDate'] = data['lastTradeDate'].dt.tz_convert(tz)
return data
def option_chain(self, date=None, proxy=None, tz=None):
def option_chain(self, date=None, tz=None):
if date is None:
options = self._download_options(proxy=proxy)
options = self._download_options()
else:
if not self._expirations:
self._download_options()
@@ -91,7 +92,7 @@ class Ticker(TickerBase):
f"Expiration `{date}` cannot be found. "
f"Available expirations are: [{', '.join(self._expirations)}]")
date = self._expirations[date]
options = self._download_options(date, proxy=proxy)
options = self._download_options(date)
return _namedtuple('Options', ['calls', 'puts', 'underlying'])(**{
"calls": self._options2df(options['calls'], tz=tz),
@@ -117,12 +118,24 @@ class Ticker(TickerBase):
def mutualfund_holders(self) -> _pd.DataFrame:
return self.get_mutualfund_holders()
@property
def insider_purchases(self) -> _pd.DataFrame:
return self.get_insider_purchases()
@property
def insider_transactions(self) -> _pd.DataFrame:
return self.get_insider_transactions()
@property
def insider_roster_holders(self) -> _pd.DataFrame:
return self.get_insider_roster_holders()
@property
def dividends(self) -> _pd.Series:
return self.get_dividends()
@property
def capital_gains(self):
def capital_gains(self) -> _pd.Series:
return self.get_capital_gains()
@property
@@ -134,7 +147,7 @@ class Ticker(TickerBase):
return self.get_actions()
@property
def shares(self) -> _pd.DataFrame :
def shares(self) -> _pd.DataFrame:
return self.get_shares()
@property
@@ -146,13 +159,24 @@ class Ticker(TickerBase):
return self.get_fast_info()
@property
def calendar(self) -> _pd.DataFrame:
def calendar(self) -> dict:
"""
Returns a dictionary of events, earnings, and dividends for the ticker
"""
return self.get_calendar()
@property
def recommendations(self):
return self.get_recommendations()
@property
def recommendations_summary(self):
return self.get_recommendations_summary()
@property
def upgrades_downgrades(self):
return self.get_upgrades_downgrades()
@property
def earnings(self) -> _pd.DataFrame:
return self.get_earnings()
@@ -217,10 +241,6 @@ class Ticker(TickerBase):
def quarterly_cashflow(self) -> _pd.DataFrame:
return self.quarterly_cash_flow
@property
def recommendations_summary(self):
return self.get_recommendations_summary()
@property
def analyst_price_target(self) -> _pd.DataFrame:
return self.get_analyst_price_target()
@@ -240,7 +260,7 @@ class Ticker(TickerBase):
return tuple(self._expirations.keys())
@property
def news(self):
def news(self) -> list:
return self.get_news()
@property

View File

@@ -21,22 +21,16 @@
from __future__ import print_function
import atexit as _atexit
import datetime as _datetime
import logging
import os as _os
import re as _re
import peewee as _peewee
import sys as _sys
import threading
from functools import lru_cache
from inspect import getmembers
from threading import Lock
from types import FunctionType
from typing import Dict, Union, List, Optional
from typing import List, Optional
import appdirs as _ad
import numpy as _np
import pandas as _pd
import pytz as _tz
@@ -47,11 +41,6 @@ from pytz import UnknownTimeZoneError
from yfinance import const
from .const import _BASE_URL_
try:
import ujson as _json
except ImportError:
import json as _json
user_agent_headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
@@ -68,7 +57,7 @@ def attributes(obj):
@lru_cache(maxsize=20)
def print_once(msg):
# 'warnings' module suppression of repeat messages does not work.
# 'warnings' module suppression of repeat messages does not work.
# This function replicates correct behaviour
print(msg)
@@ -591,8 +580,8 @@ def fix_Yahoo_returning_prepost_unrequested(quotes, interval, tradingPeriods):
def fix_Yahoo_returning_live_separate(quotes, interval, tz_exchange):
# Yahoo bug fix. If market is open today then Yahoo normally returns
# todays data as a separate row from rest-of week/month interval in above row.
# Yahoo bug fix. If market is open today then Yahoo normally returns
# todays data as a separate row from rest-of week/month interval in above row.
# Seems to depend on what exchange e.g. crypto OK.
# Fix = merge them together
n = quotes.shape[0]
@@ -656,7 +645,6 @@ def safe_merge_dfs(df_main, df_sub, interval):
if df_main.empty:
return df_main
df_sub_backup = df_sub.copy()
data_cols = [c for c in df_sub.columns if c not in df_main]
if len(data_cols) > 1:
raise Exception("Expected 1 data col")
@@ -679,11 +667,18 @@ def safe_merge_dfs(df_main, df_sub, interval):
indices = _np.searchsorted(_np.append(df_main.index, df_main.index[-1] + td), df_sub.index, side='right')
indices -= 1 # Convert from [[i-1], [i]) to [[i], [i+1])
# Numpy.searchsorted does not handle out-of-range well, so handle manually:
for i in range(len(df_sub.index)):
dt = df_sub.index[i]
if dt < df_main.index[0] or dt >= df_main.index[-1] + td:
# Out-of-range
indices[i] = -1
if intraday:
for i in range(len(df_sub.index)):
dt = df_sub.index[i].date()
if dt < df_main.index[0].date() or dt >= df_main.index[-1].date() + _datetime.timedelta(days=1):
# Out-of-range
indices[i] = -1
else:
for i in range(len(df_sub.index)):
dt = df_sub.index[i]
if dt < df_main.index[0] or dt >= df_main.index[-1] + td:
# Out-of-range
indices[i] = -1
f_outOfRange = indices == -1
if f_outOfRange.any():
@@ -694,7 +689,7 @@ def safe_merge_dfs(df_main, df_sub, interval):
df_main['Dividends'] = 0.0
return df_main
else:
empty_row_data = {c:[_np.nan] for c in const.price_colnames}|{'Volume':[0]}
empty_row_data = {**{c:[_np.nan] for c in const._PRICE_COLNAMES_}, 'Volume':[0]}
if interval == '1d':
# For 1d, add all out-of-range event dates
for i in _np.where(f_outOfRange)[0]:
@@ -703,7 +698,7 @@ def safe_merge_dfs(df_main, df_sub, interval):
empty_row = _pd.DataFrame(data=empty_row_data, index=[dt])
df_main = _pd.concat([df_main, empty_row], sort=True)
else:
# Else, only add out-of-range event dates if occurring in interval
# Else, only add out-of-range event dates if occurring in interval
# immediately after last price row
last_dt = df_main.index[-1]
next_interval_start_dt = last_dt + td
@@ -711,7 +706,6 @@ def safe_merge_dfs(df_main, df_sub, interval):
for i in _np.where(f_outOfRange)[0]:
dt = df_sub.index[i]
if next_interval_start_dt <= dt < next_interval_end_dt:
new_dt = next_interval_start_dt
get_yf_logger().debug(f"Adding out-of-range {data_col} @ {dt.date()} in new prices row of NaNs")
empty_row = _pd.DataFrame(data=empty_row_data, index=[dt])
df_main = _pd.concat([df_main, empty_row], sort=True)
@@ -771,14 +765,14 @@ def safe_merge_dfs(df_main, df_sub, interval):
def fix_Yahoo_dst_issue(df, interval):
if interval in ["1d", "1w", "1wk"]:
# These intervals should start at time 00:00. But for some combinations of date and timezone,
# These intervals should start at time 00:00. But for some combinations of date and timezone,
# Yahoo has time off by few hours (e.g. Brazil 23:00 around Jan-2022). Suspect DST problem.
# The clue is (a) minutes=0 and (b) hour near 0.
# The clue is (a) minutes=0 and (b) hour near 0.
# Obviously Yahoo meant 00:00, so ensure this doesn't affect date conversion:
f_pre_midnight = (df.index.minute == 0) & (df.index.hour.isin([22, 23]))
dst_error_hours = _np.array([0] * df.shape[0])
dst_error_hours[f_pre_midnight] = 24 - df.index[f_pre_midnight].hour
df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
df.index += _pd.to_timedelta(dst_error_hours, 'h')
return df
@@ -864,9 +858,9 @@ class ProgressBar:
if self.elapsed > self.iterations:
self.elapsed = self.iterations
self.update_iteration(1)
print('\r' + str(self), end='')
_sys.stdout.flush()
print()
print('\r' + str(self), end='', file=_sys.stderr)
_sys.stderr.flush()
print("", file=_sys.stderr)
def animate(self, iteration=None):
if iteration is None:
@@ -875,8 +869,8 @@ class ProgressBar:
else:
self.elapsed += iteration
print('\r' + str(self), end='')
_sys.stdout.flush()
print('\r' + str(self), end='', file=_sys.stderr)
_sys.stderr.flush()
self.update_iteration()
def update_iteration(self, val=None):
@@ -896,156 +890,3 @@ class ProgressBar:
def __str__(self):
return str(self.prog_bar)
# ---------------------------------
# TimeZone cache related code
# ---------------------------------
_cache_init_lock = Lock()
class _TzCacheException(Exception):
pass
class _TzCacheDummy:
"""Dummy cache to use if tz cache is disabled"""
def lookup(self, tkr):
return None
def store(self, tkr, tz):
pass
@property
def tz_db(self):
return None
class _TzCacheManager:
_tz_cache = None
@classmethod
def get_tz(cls):
if cls._tz_cache is None:
cls._initialise()
return cls._tz_cache
@classmethod
def _initialise(cls, cache_dir=None):
try:
cls._tz_cache = _TzCache()
except _TzCacheException as err:
get_yf_logger().info(f"Failed to create TzCache, reason: {err}. "
"TzCache will not be used. "
"Tip: You can direct cache to use a different location with 'set_tz_cache_location(mylocation)'")
cls._tz_cache = _TzCacheDummy()
class _DBManager:
_db = None
_cache_dir = _os.path.join(_ad.user_cache_dir(), "py-yfinance")
@classmethod
def get_database(cls):
if cls._db is None:
cls._initialise()
return cls._db
@classmethod
def close_db(cls):
if cls._db is not None:
try:
cls._db.close()
except Exception as e:
# Must discard exceptions because Python trying to quit.
pass
@classmethod
def _initialise(cls, cache_dir=None):
if cache_dir is not None:
cls._cache_dir = cache_dir
if not _os.path.isdir(cls._cache_dir):
_os.mkdir(cls._cache_dir)
cls._db = _peewee.SqliteDatabase(
_os.path.join(cls._cache_dir, 'tkr-tz.db'),
pragmas={'journal_mode': 'wal', 'cache_size': -64}
)
old_cache_file_path = _os.path.join(cls._cache_dir, "tkr-tz.csv")
if _os.path.isfile(old_cache_file_path):
_os.remove(old_cache_file_path)
@classmethod
def change_location(cls, new_cache_dir):
cls._db.close()
cls._db = None
cls._cache_dir = new_cache_dir
# close DB when Python exists
_atexit.register(_DBManager.close_db)
class _KV(_peewee.Model):
key = _peewee.CharField(primary_key=True)
value = _peewee.CharField(null=True)
class Meta:
database = _DBManager.get_database()
without_rowid = True
class _TzCache:
def __init__(self):
db = _DBManager.get_database()
db.connect()
db.create_tables([_KV])
def lookup(self, key):
try:
return _KV.get(_KV.key == key).value
except _KV.DoesNotExist:
return None
def store(self, key, value):
db = _DBManager.get_database()
try:
if value is None:
q = _KV.delete().where(_KV.key == key)
q.execute()
return
with db.atomic():
_KV.insert(key=key, value=value).execute()
except _peewee.IntegrityError:
# Integrity error means the key already exists. Try updating the key.
old_value = self.lookup(key)
if old_value != value:
get_yf_logger().debug(f"Value for key {key} changed from {old_value} to {value}.")
with db.atomic():
q = _KV.update(value=value).where(_KV.key == key)
q.execute()
def get_tz_cache():
"""
Get the timezone cache, initializes it and creates cache folder if needed on first call.
If folder cannot be created for some reason it will fall back to initialize a
dummy cache with same interface as real cash.
"""
# as this can be called from multiple threads, protect it.
with _cache_init_lock:
return _TzCacheManager.get_tz()
def set_tz_cache_location(cache_dir: str):
"""
Sets the path to create the "py-yfinance" cache folder in.
Useful if the default folder returned by "appdir.user_cache_dir()" is not writable.
Must be called before cache is used (that is, before fetching tickers).
:param cache_dir: Path to use for caches
:return: None
"""
_DBManager.change_location(cache_dir)

View File

@@ -1 +1 @@
version = "0.2.30"
version = "0.2.38"