Compare commits

...

134 Commits

Author SHA1 Message Date
ValueRaider
b67372e4eb Version 0.2.0rc2 2022-11-12 21:28:22 +00:00
ValueRaider
77107c6ea0 Merge pull request #1168 from ranaroussi/dev
Merge dev -> main for release 0.2.0rc2
2022-11-12 21:20:34 +00:00
ValueRaider
2a0e14962e Merge pull request #1157 from fredrik-corneliusson/fix-fundamentals-regression-bug
Fix fundamentals regression bug
2022-11-11 11:51:50 +00:00
ValueRaider
43aae83a1b Merge pull request #1161 from ranaroussi/fix/events-merge
Fix merging events with day/wk/mth prices
2022-11-10 21:50:56 +00:00
Fredrik Corneliusson
fff8e9145d Fixed #1160 2022-11-10 19:17:18 +01:00
ValueRaider
4f1e7a49c4 Fix merging events with day/wk/mth prices 2022-11-10 14:15:18 +00:00
Fredrik Corneliusson
357da735ea Fix fundamentals regression bug
The wrong data was returned for balance_sheet and cashflow
2022-11-10 01:51:15 +01:00
ValueRaider
b7b3b4975d Merge pull request #1148 from ranaroussi/feature/improve-repair-zero
Repair: add zero-price repair + refactor
2022-11-08 15:13:28 +00:00
ValueRaider
7d5fdb6f83 Merge branch 'dev' into feature/improve-repair-zero 2022-11-08 15:10:41 +00:00
ValueRaider
1c2ed86313 Repair: implement _fix_zero_prices(), refactor _fix_unit_mixups(), improve ratio calc 2022-11-08 15:04:59 +00:00
ValueRaider
23e8423b8b Merge pull request #1147 from fredrik-corneliusson/request_optimization
Request optimization
2022-11-08 14:44:55 +00:00
Fredrik Corneliusson
c7cf4378f6 Lowered lru_cache size and made cache_info and cache_clear work on lru_cached methods. 2022-11-08 01:36:28 +01:00
Fredrik Corneliusson
5bfbec5df0 Decreased default cache_maxsize for lru_cache after some investigation of memory usage. Also fixed warning about wrong type used for dataframe index. 2022-11-07 10:29:42 +01:00
Fredrik Corneliusson
a775669ac5 Tried to clean up the temp folder creation in test. 2022-11-07 00:00:55 +01:00
Fredrik Corneliusson
f96dfc25c2 Merge branch 'dev' into request_optimization
# Conflicts:
#	tests/ticker.py
#	yfinance/base.py
2022-11-06 23:31:14 +01:00
fredrik-corneliusson
f6c311815d Merge branch 'ranaroussi:main' into request_optimization 2022-11-06 22:47:57 +01:00
Fredrik Corneliusson
112fd5cf64 Added more tests for Ticker and missing dependencies. 2022-11-06 20:49:36 +01:00
Fredrik Corneliusson
2be718700f Fixed regression bug with balance_sheet and added test for it. 2022-11-06 20:16:10 +01:00
ValueRaider
080d33f597 Fix typo in #1140 2022-11-06 19:00:28 +00:00
ValueRaider
c248b422da Merge pull request #1140 from ranaroussi/fix/bad-ticker-handling
Improve bad ticker handling ; Remove redundant get_earnings_history()
2022-11-06 18:47:11 +00:00
ValueRaider
b050692ee4 Improve bad ticker handling ; Remove redundant get_earnings_history() 2022-11-06 18:30:05 +00:00
Fredrik Corneliusson
2fed55a0d1 Improved TestTickerHolders test. 2022-11-06 19:14:51 +01:00
Fredrik Corneliusson
438f512f47 Cleaned up .gitignore 2022-11-06 17:01:09 +01:00
Fredrik Corneliusson
157b45269d Fixed regression in PR and cleaned up .gitignore 2022-11-06 16:59:06 +01:00
ValueRaider
9b169e60fb Merge pull request #1143 from ranaroussi/fix/dst-bugfix
Fix the fixes for Yahoo data issues (DST, weekly-2-rows) + tests
2022-11-06 13:48:53 +00:00
ValueRaider
1b439c4af6 Fix the fixes for Yahoo data issues (DST, weekly-2-rows) + tests 2022-11-06 13:47:34 +00:00
Fredrik Corneliusson
2cc3cbb1e8 Removed extra requests logging used for debugging. 2022-11-06 14:06:39 +01:00
Fredrik Corneliusson
743f3acb87 Removed unused get_html method. 2022-11-06 13:55:44 +01:00
Fredrik Corneliusson
9f9f7b00d8 Revert to require a pandas version that supports 3.6.1 2022-11-06 13:50:21 +01:00
Fredrik Corneliusson
07e19f187a Dropped Python versions before 3.6 from package support metadata 2022-11-06 13:34:58 +01:00
ValueRaider
45169d9ff4 Merge pull request #1144 from fredrik-corneliusson/dev
Fixed some of the formatting errors reported by PyCharm as well as simplified some code constructs.
2022-11-06 12:27:10 +00:00
Fredrik Corneliusson
05520ee108 Have one place to retrieve data in order to ease caching and speed up operations and reduce code duplication. Needs Python 3.6 2022-11-06 13:26:52 +01:00
Fredrik Corneliusson
e1dec42950 Fix regression in PR #1144 2022-11-06 13:23:44 +01:00
Fredrik Corneliusson
369cbc41e5 Merge remote-tracking branch 'origin/dev' into dev
# Conflicts:
#	yfinance/base.py
2022-11-05 13:46:41 +01:00
ValueRaider
4d4c5c1819 Merge pull request #1138 from ranaroussi/patch/peg-ratio-trailing
Move get 'trailingPegRatio' into _get_info(), simplify & optimise
2022-11-02 15:19:43 +00:00
ValueRaider
028334de92 Merge pull request #1058 from PlanetNamekTech/patch-1
Update README.md with new notation
2022-11-02 15:16:55 +00:00
ValueRaider
2b1d5c848c Move get 'trailingPegRatio' into _get_info(), simplify & optimise 2022-10-31 23:51:40 +00:00
Fredrik Corneliusson
fe5a9d70e5 Removed unnecessary escaping in regexp. Formatted code and updated if statements to make sure variables used always is defined. 2022-10-29 16:43:45 +02:00
Fredrik Corneliusson
e89fe6357b Simplified the code and changed **kwargs to normal keyword arguments. 2022-10-29 13:32:32 +02:00
ValueRaider
bcd6e5b11d Merge pull request #1128 from ranaroussi/fix/financials-data
Fix financials tables
2022-10-28 14:30:41 +01:00
ValueRaider
e83cc74800 Merge branch 'dev' into fix/financials-data 2022-10-28 14:26:07 +01:00
ValueRaider
9e529f3c8f Revert version bump 2022-10-28 14:18:13 +01:00
ValueRaider
e29df56253 Financials - reorder rows to match website, disable MultiIndex 2022-10-28 14:16:54 +01:00
Fredrik Corneliusson
f6a0979916 Simplified the code and changed **kwargs to normal keyword arguments. 2022-10-28 02:19:59 +02:00
ValueRaider
e3d2c5d6d7 Merge pull request #1064 from Jossan84/main
Bugfix: Get logo url when no website exists
2022-10-27 22:29:12 +01:00
Fredrik Corneliusson
a836f24144 Fixed some of the formatting errors reported by PyCharm IDE 2022-10-27 00:32:58 +02:00
ValueRaider
fb5c67b3bd Bump version to 0.2.0rc1 - big update 2022-10-26 22:39:41 +01:00
ValueRaider
3f33aa0377 Merge pull request #1119 from ranaroussi/dev
Improve error handling
2022-10-26 16:23:59 +01:00
ValueRaider
ecdc36ab8e Merge pull request #1118 from fredrik-corneliusson/dev
Better handling of error from yahoo API, added missing pytz dependency and fixed if statement syntax warnings
2022-10-25 21:47:43 +01:00
Fredrik Corneliusson
fbc5de153a Handle error from yahoo api 2022-10-25 21:52:09 +02:00
Fredrik Corneliusson
e4a228b830 Some fixes and better debug if failing to fetch timezone from ticker. 2022-10-25 21:36:42 +02:00
Fredrik Corneliusson
3cee66dea7 Some fixes and better debug if failing to fetch timezone from ticker. 2022-10-25 21:22:45 +02:00
ValueRaider
bec5b38189 Merge pull request #1117 from ranaroussi/dev
Merge all dev updates into main
2022-10-25 18:13:13 +01:00
ValueRaider
f5973b2c89 Merge branch 'main' into dev 2022-10-25 17:49:02 +01:00
ValueRaider
edb911b913 Pre-emptive ambiguous DST fix 2022-10-25 17:42:44 +01:00
ValueRaider
6117b0a042 Fix syntax error 2022-10-25 16:56:32 +01:00
ValueRaider
5cb5484a9a Fix tests.ticker to use new cache API 2022-10-25 14:16:14 +01:00
ValueRaider
4e33ddf615 Merge pull request #1113 from fredrik-corneliusson/dev
Fix cache error on read only system #1108
2022-10-25 14:14:08 +01:00
ValueRaider
6d87f3d689 Fix PR merge 2022-10-25 14:10:23 +01:00
ValueRaider
b30b97fa36 Merge pull request #1116 from ranaroussi/fix/outlier-repair-bugfixes
Fix price repair ; Improve repair test
2022-10-25 14:05:20 +01:00
ValueRaider
6253e1d8a0 Merge pull request #1112 from ranaroussi/fix/get-tz-performance
Improve performance of fetching Ticker timezone
2022-10-25 14:03:04 +01:00
ValueRaider
2dce6a705c Remove debug code 2022-10-25 14:01:44 +01:00
ValueRaider
df11fcdb37 Improve Ticker._fetch_ticker_tz() ; Change timeout default to 10 2022-10-25 13:59:51 +01:00
fredrik-corneliusson
567e2cf0d3 Merge branch 'ranaroussi:dev' into dev 2022-10-25 01:00:47 +02:00
Fredrik Corneliusson
3d6e88857b Merge remote-tracking branch 'origin/dev' into dev 2022-10-25 01:00:13 +02:00
Fredrik Corneliusson
59af19d84c Fix cache error on read only system #1108 2022-10-25 00:59:05 +02:00
ValueRaider
e07191b627 Fix price repair ; Improve repair test 2022-10-24 23:55:16 +01:00
ValueRaider
2623ba967d Simplify Ticker._fetch_ticker_tz() - 2 2022-10-24 13:46:39 +01:00
ValueRaider
fe1c705e24 Simplify Ticker._fetch_ticker_tz() 2022-10-24 13:45:25 +01:00
ValueRaider
9315f7b61d Add Ticker._fetch_ticker_tz() for faster tz fetch 2022-10-24 13:34:57 +01:00
ValueRaider
f76c788881 Remove debug print 2022-10-24 11:21:56 +01:00
ValueRaider
561f56c9f9 Merge pull request #1110 from ranaroussi/feature/outlier-repair
Feature - repair 100x price errors
2022-10-24 00:16:25 +01:00
ValueRaider
cf795ea0c7 Merge pull request #1109 from fredrik-corneliusson/dev
Fix for #1076
2022-10-24 00:03:40 +01:00
ValueRaider
643536b53b Fix '_fix_unit_mixups()' when data missing split-adjustment 2022-10-23 23:46:33 +01:00
fredrik-corneliusson
ae8a5ff996 Merge branch 'ranaroussi:dev' into dev 2022-10-23 23:12:21 +02:00
Fredrik Corneliusson
d01d378c8d Small cleanup to ease finding bug #1076. Begun by getting rid of multiple calls to self.info (get_info). 2022-10-23 22:37:07 +02:00
ValueRaider
9e0152aae4 Merge pull request #1105 from fredrik-corneliusson/dev
Fix and improve timezone cache concurrency
2022-10-23 16:43:53 +01:00
Fredrik Corneliusson
6c21c1994e Fix bug, create cache directory if it does not exists. 2022-10-23 15:27:41 +02:00
Fredrik Corneliusson
d24a25f579 Add missing typehint 2022-10-23 13:59:48 +02:00
Fredrik Corneliusson
422a50672d Lazy init of cache db and added migration of data from old CSV cache. 2022-10-23 13:43:40 +02:00
ValueRaider
6e09410c7d Improve repair feedback msg 2022-10-23 00:03:23 +01:00
ValueRaider
3c51687351 Add arg history(repair=False) to fix $/cents £/p mixups 2022-10-22 23:58:20 +01:00
Fredrik Corneliusson
783df54978 Bugfix, do not set tz in cache if it is None, just delete it. 2022-10-22 23:56:50 +02:00
Fredrik Corneliusson
c76bf0128f Improve timezone cache to make it more reliable when using threads by using SQLLite. 2022-10-22 23:30:48 +02:00
ValueRaider
33f57ac002 Merge pull request #1104 from ranaroussi/feature/improve-err-msgs
Improve error message handling
2022-10-22 16:30:47 +01:00
ValueRaider
c0e1536179 Improve error message handling
Add error check for 'period' ; simplify err-msg handling ; new arg 'raise_errors' to control print-vs-Exception
2022-10-21 23:36:37 +01:00
ValueRaider
303e0ea655 Merge pull request #1102 from ranaroussi/fix/price-tz-and-events
Various fixes to price data
2022-10-21 22:19:11 +01:00
ValueRaider
40424b71a6 Fix test 'test_intraDayWithEvents' 2022-10-21 17:26:15 +01:00
ValueRaider
b018f917a9 Port in: 'Fix when Yahoo returns price=NaNs on dividend day' 2022-10-21 17:21:19 +01:00
ValueRaider
28e50946ca Fix Ticker.dividends property 2022-10-21 15:44:36 +01:00
ValueRaider
841b485b1d Drop out-of-date-range events 2022-10-21 15:37:51 +01:00
ValueRaider
e842a9d657 Event-merge fixes: intra-day, weely, lost tz, 'test_intraDayWithEvents' 2022-10-21 15:26:59 +01:00
ValueRaider
0f14728591 Add test 'test_tz_dst_ambiguous' 2022-10-21 15:26:45 +01:00
ValueRaider
69dfe325ae Add tz to daily price data 2022-10-21 12:54:48 +01:00
ValueRaider
f20aa9a875 Merge pull request #1099 from ranaroussi/feature/improve-tz-cache
Improve timezone cache
2022-10-21 10:31:01 +01:00
ValueRaider
5707c1aa65 Merge branch 'fix/download-timezones' into dev 2022-10-21 10:16:27 +01:00
ValueRaider
053e0b9abb Port in @git-shogg fix, & fix typos 2022-10-20 22:24:24 +01:00
ValueRaider
730afda4a7 Fix financials placeholders 2022-10-20 22:19:08 +01:00
ValueRaider
1e7f4a9a91 Strengthen tz-cache against bad/corrupt values - more 2022-10-20 22:09:37 +01:00
ValueRaider
37c36549e4 Add mutex to tz-cache update 2022-10-20 22:01:08 +01:00
ValueRaider
bda339b170 Strengthen tz-cache against bad/corrupt values 2022-10-20 21:59:20 +01:00
ValueRaider
f5995161ed Optimise TZ cache indexing 2022-10-20 21:54:58 +01:00
ValueRaider
6e96a1a8e6 Refactor properly ; Rename some new properties 2022-10-17 16:40:12 +01:00
ValueRaider
68b8671cea Merge pull request #776 from git-shogg/main
Enhanced the detail of the annual financial statements and added ability to check the "Analysis" url.
2022-10-17 12:59:51 +01:00
Stephen Hogg
3b8114c135 Functions to minimize get_fundamentals. _DEV tests removed. 2022-10-17 21:02:58 +10:00
Stephen Hogg
d65391b798 Merged (refactored). Quarterlies updates. Multi-Index Inlcluded. 2022-10-16 17:34:23 +10:00
Stephen Hogg
6c4da51519 Remediated missing annual table line items flagged by @ValueRaider. 2022-10-15 11:22:39 +10:00
ValueRaider
4734e92090 Merge pull request #1070 from ranaroussi/fix/weekly-prices
Fix weekly/monthly prices across 2 rows
2022-10-14 23:18:59 +01:00
ValueRaider
5fdf2463e9 Merge branch 'dev' into fix/weekly-prices 2022-10-14 23:18:15 +01:00
ValueRaider
c679551faa Add unittest for duplication fix 2022-10-14 23:15:13 +01:00
ValueRaider
fdf52ac360 Merge pull request #1086 from ranaroussi/fix/events-merge
Fix merging pre-market events with min/hour prices
2022-10-14 14:08:35 +01:00
ValueRaider
94ad0bd955 Fix merging pre-market events with min/hour prices 2022-10-12 22:41:10 +01:00
ValueRaider
51c0ea0050 Enhance recent unittest 2022-10-10 15:37:55 +01:00
ValueRaider
3401d4dbe7 Merge pull request #1069 from ranaroussi/fix/events-merge
Fix merging of dividends/splits with prices
2022-10-10 14:01:31 +01:00
ValueRaider
a724585552 Tidy syntax 2022-10-10 14:00:10 +01:00
ValueRaider
1c85433cc0 Add unittest for div/splits merging 2022-10-10 13:58:17 +01:00
ValueRaider
34e1b2f157 Add new time-series functions and compare against new scraping 2022-10-09 17:20:07 +01:00
Value Raider
c80bfc0417 Manually merge pull request #776 - Fix & enhance annual financials 2022-10-08 20:31:42 +01:00
ValueRaider
5c0b2bbaa3 Fix weekly/monthly prices across 2 rows 2022-10-02 18:26:05 +01:00
ValueRaider
7d45a6709a Fix merging of dividends/splits with prices 2022-10-02 18:20:11 +01:00
Jose Manuel
42e5751705 Bugfix: Get logo url when no website exists 2022-09-19 13:54:56 +02:00
PlanetNamekTech
2ff2c57dcf Update README.md with new notation
Multiple ticker objects section doesn't seem to work with dot notation when accessing ticker.
2022-09-09 18:38:24 -07:00
Stephen Hogg
3f23c067f9 Updated to ensure .info is brought in correctly. 2021-08-08 13:46:53 +10:00
Stephen Hogg
7a395c37e9 Updated base with some exceptions and utils. 2021-07-18 20:51:33 +10:00
Stephen Hogg
e7c55bbdec Updated to resolve Travis CI Build Fail. 2021-07-18 12:37:15 +10:00
Stephen Hogg
87dc9fb345 Potential issues flagged by CodeFactor updated. Believe that the warnings with regards to "statement seems to have no effect" is because the import on this python file is yfinance (master version 1.63). 2021-07-18 12:16:42 +10:00
Stephen Hogg
81a0a4e665 Updated to align with the yfinance upstream main. 2021-07-18 11:02:41 +10:00
Stephen Hogg
4774485477 Merge https://github.com/ranaroussi/yfinance into main 2021-07-18 10:56:45 +10:00
Stephen Hogg
c3da55f2e4 Finished updating all of the required updates. 2021-07-12 19:34:18 +10:00
Stephen Hogg
a1769e4fe1 Added self._income_statement, once happy this new dataframe should replace self._financials. Further work required to understand if there is the opportunity to also do this for balance sheet and cf statement. 2021-07-10 10:48:15 +10:00
Stephen Hogg
cb31036153 Updated to include analyst price target forecasts. 2021-07-08 17:11:10 +10:00
Stephen Hogg
da2672f338 Updated README to include additional functions. 2021-07-08 16:47:47 +10:00
Stephen Hogg
91f4891475 Added some functions to enable us to grab key details from the analysis section of yahoo finance: current_recommendations, revenue_forecasts and earnings_forecasts. 2021-07-07 20:33:35 +10:00
Stephen Hogg
883b7f0775 Updated the utils get_json function to standardize the return. This will enable us to see other store types from the base.py file. 2021-07-07 18:50:10 +10:00
17 changed files with 2181 additions and 433 deletions

7
.gitignore vendored
View File

@@ -9,3 +9,10 @@ build/
*.html
*.css
*.png
# Environments
.env
.venv
env/
venv/
ENV/

View File

@@ -1,6 +1,30 @@
Change Log
===========
0.2.0rc2
--------
Financials
- fix financials tables to match website #1128 #1157
- lru_cache to optimise web requests #1147
Prices
- improve price repair #1148
- fix merging dividends/splits with day/week/monthly prices #1161
- fix the Yahoo DST fixes #1143
- improve bad/delisted ticker handling #1140
Misc
- fix 'trailingPegRatio' #1138
- improve error handling #1118
0.2.0rc1
--------
Jumping to 0.2 for this big update. 0.1.* will continue to receive bug-fixes
- timezone cache performance massively improved. Thanks @fredrik-corneliusson #1113 #1112 #1109 #1105 #1099
- price repair feature #1110
- fix merging of dividends/splits with prices #1069 #1086 #1102
- fix Yahoo returning latest price interval across 2 rows #1070
- optional: raise errors as exceptions: raise_errors=True #1104
- add proper unit tests #1069
0.1.81
------
- Fix unhandled tz-cache exception #1107

View File

@@ -68,9 +68,20 @@ msft.dividends
# show splits
msft.splits
# show financials
msft.financials
msft.quarterly_financials
# show share count
msft.shares
# show income statement
msft.income_stmt
msft.quarterly_income_stmt
# show balance sheet
msft.balance_sheet
msft.quarterly_balance_sheet
# show cash flow statement
msft.cashflow
msft.quarterly_cashflow
# show major holders
msft.major_holders
@@ -78,13 +89,8 @@ msft.major_holders
# show institutional holders
msft.institutional_holders
# show balance sheet
msft.balance_sheet
msft.quarterly_balance_sheet
# show cashflow
msft.cashflow
msft.quarterly_cashflow
# show mutualfund holders
msft.mutualfund_holders
# show earnings
msft.earnings
@@ -95,6 +101,12 @@ msft.sustainability
# show analysts recommendations
msft.recommendations
msft.recommendations_summary
# show analysts other work
msft.analyst_price_target
mfst.revenue_forecasts
mfst.earnings_forecasts
mfst.earnings_trend
# show next event (earnings, etc)
msft.calendar
@@ -156,9 +168,9 @@ tickers = yf.Tickers('msft aapl goog')
# ^ returns a named tuple of Ticker objects
# access each ticker using (example)
tickers.tickers.MSFT.info
tickers.tickers.AAPL.history(period="1mo")
tickers.tickers.GOOG.actions
tickers.tickers['MSFT'].info
tickers.tickers['AAPL'].history(period="1mo")
tickers.tickers['GOOG'].actions
```
### Fetching data for multiple tickers
@@ -198,6 +210,9 @@ data = yf.download( # or pdr.get_data_yahoo(...
# (optional, default is False)
auto_adjust = True,
# identify and attempt repair of currency unit mixups e.g. $/cents
repair = False,
# download pre/post regular market hours data
# (optional, default is False)
prepost = True,

View File

@@ -4,3 +4,7 @@ requests>=2.26
multitasking>=0.0.7
lxml>=4.5.1
appdirs>=1.4.4
pytz>=2022.5
frozendict>=2.3.4
beautifulsoup4>=4.11.1
html5lib>=1.1

View File

@@ -38,8 +38,8 @@ setup(
classifiers=[
'License :: OSI Approved :: Apache Software License',
# 'Development Status :: 3 - Alpha',
# 'Development Status :: 4 - Beta',
'Development Status :: 5 - Production/Stable',
'Development Status :: 4 - Beta',
#'Development Status :: 5 - Production/Stable',
'Operating System :: OS Independent',
@@ -50,20 +50,20 @@ setup(
'Topic :: Software Development :: Libraries',
'Topic :: Software Development :: Libraries :: Python Modules',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
# 'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
],
platforms=['any'],
keywords='pandas, yahoo finance, pandas datareader',
packages=find_packages(exclude=['contrib', 'docs', 'tests', 'examples']),
install_requires=['pandas>=0.24.0', 'numpy>=1.15',
install_requires=['pandas>=1.1.0', 'numpy>=1.15',
'requests>=2.26', 'multitasking>=0.0.7',
'lxml>=4.5.1', 'appdirs>=1.4.4'],
'lxml>=4.5.1', 'appdirs>=1.4.4', 'pytz>=2022.5',
'frozendict>=2.3.4',
'beautifulsoup4>=4.11.1', 'html5lib>=1.1'],
entry_points={
'console_scripts': [
'sample=sample:main',

View File

@@ -37,23 +37,27 @@ class TestTicker(unittest.TestCase):
ticker.dividends
ticker.splits
ticker.actions
ticker.shares
ticker.info
ticker.calendar
ticker.recommendations
ticker.earnings
ticker.quarterly_earnings
ticker.financials
ticker.quarterly_financials
ticker.income_stmt
ticker.quarterly_income_stmt
ticker.balance_sheet
ticker.quarterly_balance_sheet
ticker.cashflow
ticker.quarterly_cashflow
ticker.recommendations_summary
ticker.analyst_price_target
ticker.revenue_forecasts
ticker.sustainability
ticker.options
ticker.news
ticker.shares
ticker.earnings_history
ticker.earnings_trend
ticker.earnings_dates
ticker.earnings_forecasts
def test_holders(self):
for ticker in tickers:

1
tests/__init__.py Normal file
View File

@@ -0,0 +1 @@
#!/usr/bin/env python

9
tests/context.py Normal file
View File

@@ -0,0 +1,9 @@
# -*- coding: utf-8 -*-
import sys
import os
_parent_dp = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
_src_dp = _parent_dp
sys.path.insert(0, _src_dp)
import yfinance

449
tests/prices.py Normal file
View File

@@ -0,0 +1,449 @@
from .context import yfinance as yf
import unittest
import datetime as _dt
import pytz as _tz
import numpy as _np
import pandas as _pd
import os
# Create temp session
import requests_cache, tempfile
td = tempfile.TemporaryDirectory()
class TestPriceHistory(unittest.TestCase):
def setUp(self):
global td
self.td = td
self.session = requests_cache.CachedSession(os.path.join(self.td.name, "yfinance.cache"))
def tearDown(self):
self.session.close()
def test_daily_index(self):
tkrs = ["BHP.AX", "IMP.JO", "BP.L", "PNL.L", "INTC"]
intervals = ["1d", "1wk", "1mo"]
for tkr in tkrs:
dat = yf.Ticker(tkr, session=self.session)
for interval in intervals:
df = dat.history(period="5y", interval=interval)
f = df.index.time == _dt.time(0)
self.assertTrue(f.all())
def test_duplicatingDaily(self):
tkrs = ["IMP.JO", "BHG.JO", "SSW.JO", "BP.L", "INTC"]
test_run = False
for tkr in tkrs:
dat = yf.Ticker(tkr, session=self.session)
tz = dat._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
dt_utc = _tz.timezone("UTC").localize(_dt.datetime.utcnow())
dt = dt_utc.astimezone(_tz.timezone(tz))
if dt.time() < _dt.time(17, 0):
continue
test_run = True
df = dat.history(start=dt.date() - _dt.timedelta(days=7), interval="1d")
dt0 = df.index[-2]
dt1 = df.index[-1]
try:
self.assertNotEqual(dt0, dt1)
except:
print("Ticker = ", tkr)
raise
if not test_run:
self.skipTest("Skipping test_duplicatingDaily() because only expected to fail just after market close")
def test_duplicatingWeekly(self):
tkrs = ['MSFT', 'IWO', 'VFINX', '^GSPC', 'BTC-USD']
test_run = False
for tkr in tkrs:
dat = yf.Ticker(tkr, session=self.session)
tz = dat._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
dt = _tz.timezone(tz).localize(_dt.datetime.now())
if dt.date().weekday() not in [1, 2, 3, 4]:
continue
test_run = True
df = dat.history(start=dt.date() - _dt.timedelta(days=7), interval="1wk")
dt0 = df.index[-2]
dt1 = df.index[-1]
try:
self.assertNotEqual(dt0.week, dt1.week)
except:
print("Ticker={}: Last two rows within same week:".format(tkr))
print(df.iloc[df.shape[0] - 2:])
raise
if not test_run:
self.skipTest("Skipping test_duplicatingWeekly() because not possible to fail Monday/weekend")
def test_intraDayWithEvents(self):
# TASE dividend release pre-market, doesn't merge nicely with intra-day data so check still present
tkr = "ICL.TA"
# tkr = "ESLT.TA"
# tkr = "ONE.TA"
# tkr = "MGDL.TA"
start_d = _dt.date.today() - _dt.timedelta(days=60)
end_d = None
df_daily = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=True)
df_daily_divs = df_daily["Dividends"][df_daily["Dividends"] != 0]
if df_daily_divs.shape[0] == 0:
self.skipTest("Skipping test_intraDayWithEvents() because 'ICL.TA' has no dividend in last 60 days")
last_div_date = df_daily_divs.index[-1]
start_d = last_div_date.date()
end_d = last_div_date.date() + _dt.timedelta(days=1)
df = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="15m", actions=True)
self.assertTrue((df["Dividends"] != 0.0).any())
def test_dailyWithEvents(self):
# Reproduce issue #521
tkr1 = "QQQ"
tkr2 = "GDX"
start_d = "2014-12-29"
end_d = "2020-11-29"
df1 = yf.Ticker(tkr1).history(start=start_d, end=end_d, interval="1d", actions=True)
df2 = yf.Ticker(tkr2).history(start=start_d, end=end_d, interval="1d", actions=True)
self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any())
self.assertTrue(((df2["Dividends"] > 0) | (df2["Stock Splits"] > 0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{} missing these dates: {}".format(tkr1, missing_from_df1))
print("{} missing these dates: {}".format(tkr2, missing_from_df2))
raise
# Test that index same with and without events:
tkrs = [tkr1, tkr2]
for tkr in tkrs:
df1 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=True)
df2 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=False)
self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{}-with-events missing these dates: {}".format(tkr, missing_from_df1))
print("{}-without-events missing these dates: {}".format(tkr, missing_from_df2))
raise
def test_weeklyWithEvents(self):
# Reproduce issue #521
tkr1 = "QQQ"
tkr2 = "GDX"
start_d = "2014-12-29"
end_d = "2020-11-29"
df1 = yf.Ticker(tkr1).history(start=start_d, end=end_d, interval="1wk", actions=True)
df2 = yf.Ticker(tkr2).history(start=start_d, end=end_d, interval="1wk", actions=True)
self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any())
self.assertTrue(((df2["Dividends"] > 0) | (df2["Stock Splits"] > 0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{} missing these dates: {}".format(tkr1, missing_from_df1))
print("{} missing these dates: {}".format(tkr2, missing_from_df2))
raise
# Test that index same with and without events:
tkrs = [tkr1, tkr2]
for tkr in tkrs:
df1 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1wk", actions=True)
df2 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1wk", actions=False)
self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{}-with-events missing these dates: {}".format(tkr, missing_from_df1))
print("{}-without-events missing these dates: {}".format(tkr, missing_from_df2))
raise
def test_monthlyWithEvents(self):
tkr1 = "QQQ"
tkr2 = "GDX"
start_d = "2014-12-29"
end_d = "2020-11-29"
df1 = yf.Ticker(tkr1).history(start=start_d, end=end_d, interval="1mo", actions=True)
df2 = yf.Ticker(tkr2).history(start=start_d, end=end_d, interval="1mo", actions=True)
self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any())
self.assertTrue(((df2["Dividends"] > 0) | (df2["Stock Splits"] > 0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{} missing these dates: {}".format(tkr1, missing_from_df1))
print("{} missing these dates: {}".format(tkr2, missing_from_df2))
raise
# Test that index same with and without events:
tkrs = [tkr1, tkr2]
for tkr in tkrs:
df1 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1mo", actions=True)
df2 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1mo", actions=False)
self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{}-with-events missing these dates: {}".format(tkr, missing_from_df1))
print("{}-without-events missing these dates: {}".format(tkr, missing_from_df2))
raise
def test_tz_dst_ambiguous(self):
# Reproduce issue #1100
try:
yf.Ticker("ESLT.TA", session=self.session).history(start="2002-10-06", end="2002-10-09", interval="1d")
except _tz.exceptions.AmbiguousTimeError:
raise Exception("Ambiguous DST issue not resolved")
def test_dst_fix(self):
# Daily intervals should start at time 00:00. But for some combinations of date and timezone,
# Yahoo has time off by few hours (e.g. Brazil 23:00 around Jan-2022). Suspect DST problem.
# The clue is (a) minutes=0 and (b) hour near 0.
# Obviously Yahoo meant 00:00, so ensure this doesn't affect date conversion.
# The correction is successful if no days are weekend, and weekly data begins Monday
tkr = "AGRO3.SA"
dat = yf.Ticker(tkr, session=self.session)
start = "2021-01-11"
end = "2022-11-05"
interval = "1d"
df = dat.history(start=start, end=end, interval=interval)
self.assertTrue(((df.index.weekday >= 0) & (df.index.weekday <= 4)).all())
interval = "1wk"
df = dat.history(start=start, end=end, interval=interval)
try:
self.assertTrue((df.index.weekday == 0).all())
except:
print("Weekly data not aligned to Monday")
raise
def test_weekly_2rows_fix(self):
tkr = "AMZN"
start = _dt.date.today() - _dt.timedelta(days=14)
start -= _dt.timedelta(days=start.weekday())
dat = yf.Ticker(tkr)
df = dat.history(start=start, interval="1wk")
self.assertTrue((df.index.weekday == 0).all())
def test_repair_weekly_100x(self):
# Sometimes, Yahoo returns prices 100x the correct value.
# Suspect mixup between £/pence or $/cents etc.
# E.g. ticker PNL.L
# Setup:
tkr = "PNL.L"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.info["exchangeTimezoneName"]
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
df = _pd.DataFrame(data={"Open": [470.5, 473.5, 474.5, 470],
"High": [476, 476.5, 477, 480],
"Low": [470.5, 470, 465.5, 468.26],
"Close": [475, 473.5, 472, 473.5],
"Adj Close": [475, 473.5, 472, 473.5],
"Volume": [2295613, 2245604, 3000287, 2635611]},
index=_pd.to_datetime([_dt.date(2022, 10, 23),
_dt.date(2022, 10, 16),
_dt.date(2022, 10, 9),
_dt.date(2022, 10, 2)]))
df.index.name = "Date"
df_bad = df.copy()
df_bad.loc["2022-10-23", "Close"] *= 100
df_bad.loc["2022-10-16", "Low"] *= 100
df_bad.loc["2022-10-2", "Open"] *= 100
df.index = df.index.tz_localize(tz_exchange)
df_bad.index = df_bad.index.tz_localize(tz_exchange)
# Run test
df_repaired = dat._fix_unit_mixups(df_bad, "1wk", tz_exchange)
# First test - no errors left
for c in data_cols:
self.assertTrue(_np.isclose(df_repaired[c], df[c], rtol=1e-2).all())
# Second test - all differences should be either ~1x or ~100x
ratio = df_bad[data_cols].values / df[data_cols].values
ratio = ratio.round(2)
# - round near-100 ratio to 100:
f = ratio > 90
ratio[f] = (ratio[f] / 10).round().astype(int) * 10 # round ratio to nearest 10
# - now test
f_100 = ratio == 100
f_1 = ratio == 1
self.assertTrue((f_100 | f_1).all())
def test_repair_weekly_preSplit_100x(self):
# Sometimes, Yahoo returns prices 100x the correct value.
# Suspect mixup between £/pence or $/cents etc.
# E.g. ticker PNL.L
# PNL.L has a stock-split in 2022. Sometimes requesting data before 2022 is not split-adjusted.
tkr = "PNL.L"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.info["exchangeTimezoneName"]
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
df = _pd.DataFrame(data={"Open": [400, 398, 392.5, 417],
"High": [421, 425, 419, 420.5],
"Low": [400, 380.5, 376.5, 396],
"Close": [410, 409.5, 402, 399],
"Adj Close": [398.02, 397.53, 390.25, 387.34],
"Volume": [3232600, 3773900, 10835000, 4257900]},
index=_pd.to_datetime([_dt.date(2020, 3, 30),
_dt.date(2020, 3, 23),
_dt.date(2020, 3, 16),
_dt.date(2020, 3, 9)]))
# Simulate data missing split-adjustment:
df[data_cols] *= 100.0
df["Volume"] *= 0.01
#
df.index.name = "Date"
# Create 100x errors:
df_bad = df.copy()
df_bad.loc["2020-03-30", "Close"] *= 100
df_bad.loc["2020-03-23", "Low"] *= 100
df_bad.loc["2020-03-09", "Open"] *= 100
df.index = df.index.tz_localize(tz_exchange)
df_bad.index = df_bad.index.tz_localize(tz_exchange)
df_repaired = dat._fix_unit_mixups(df_bad, "1wk", tz_exchange)
# First test - no errors left
for c in data_cols:
try:
self.assertTrue(_np.isclose(df_repaired[c], df[c], rtol=1e-2).all())
except:
print("Mismatch in column", c)
print("- df_repaired:")
print(df_repaired[c])
print("- answer:")
print(df[c])
raise
# Second test - all differences should be either ~1x or ~100x
ratio = df_bad[data_cols].values / df[data_cols].values
ratio = ratio.round(2)
# - round near-100 ratio to 100:
f = ratio > 90
ratio[f] = (ratio[f] / 10).round().astype(int) * 10 # round ratio to nearest 10
# - now test
f_100 = ratio == 100
f_1 = ratio == 1
self.assertTrue((f_100 | f_1).all())
def test_repair_daily_100x(self):
# Sometimes, Yahoo returns prices 100x the correct value.
# Suspect mixup between £/pence or $/cents etc.
# E.g. ticker PNL.L
tkr = "PNL.L"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.info["exchangeTimezoneName"]
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
df = _pd.DataFrame(data={"Open": [478, 476, 476, 472],
"High": [478, 477.5, 477, 475],
"Low": [474.02, 474, 473, 470.75],
"Close": [475.5, 475.5, 474.5, 475],
"Adj Close": [475.5, 475.5, 474.5, 475],
"Volume": [436414, 485947, 358067, 287620]},
index=_pd.to_datetime([_dt.date(2022, 11, 1),
_dt.date(2022, 10, 31),
_dt.date(2022, 10, 28),
_dt.date(2022, 10, 27)]))
df.index.name = "Date"
df_bad = df.copy()
df_bad.loc["2022-11-01", "Close"] *= 100
df_bad.loc["2022-10-31", "Low"] *= 100
df_bad.loc["2022-10-27", "Open"] *= 100
df.index = df.index.tz_localize(tz_exchange)
df_bad.index = df_bad.index.tz_localize(tz_exchange)
df_repaired = dat._fix_unit_mixups(df_bad, "1d", tz_exchange)
# First test - no errors left
for c in data_cols:
self.assertTrue(_np.isclose(df_repaired[c], df[c], rtol=1e-2).all())
# Second test - all differences should be either ~1x or ~100x
ratio = df_bad[data_cols].values / df[data_cols].values
ratio = ratio.round(2)
# - round near-100 ratio to 100:
f = ratio > 90
ratio[f] = (ratio[f] / 10).round().astype(int) * 10 # round ratio to nearest 10
# - now test
f_100 = ratio == 100
f_1 = ratio == 1
self.assertTrue((f_100 | f_1).all())
def test_repair_daily_zeroes(self):
# Sometimes Yahoo returns price=0.0 when price obviously not zero
# E.g. ticker BBIL.L
tkr = "BBIL.L"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.info["exchangeTimezoneName"]
df_bad = _pd.DataFrame(data={"Open": [0, 102.04, 102.04],
"High": [0, 102.1, 102.11],
"Low": [0, 102.04, 102.04],
"Close": [103.03, 102.05, 102.08],
"Adj Close": [102.03, 102.05, 102.08],
"Volume": [560, 137, 117]},
index=_pd.to_datetime([_dt.datetime(2022, 11, 1),
_dt.datetime(2022, 10, 31),
_dt.datetime(2022, 10, 30)]))
df_bad.index.name = "Date"
df_bad.index = df_bad.index.tz_localize(tz_exchange)
repaired_df = dat._fix_zero_prices(df_bad, "1d", tz_exchange)
correct_df = df_bad.copy()
correct_df.loc[correct_df.index[0], "Open"] = 102.080002
correct_df.loc[correct_df.index[0], "Low"] = 102.032501
correct_df.loc[correct_df.index[0], "High"] = 102.080002
for c in ["Open", "Low", "High", "Close"]:
self.assertTrue(_np.isclose(repaired_df[c], correct_df[c], rtol=1e-8).all())
try:
if __name__ == '__main__':
unittest.main()
finally:
td.cleanup()
# # Run tests sequentially:
# import inspect
# test_src = inspect.getsource(TestPriceHistory)
# unittest.TestLoader.sortTestMethodsUsing = lambda _, x, y: (
# test_src.index(f"def {x}") - test_src.index(f"def {y}")
# )
# unittest.main(verbosity=2)

300
tests/ticker.py Normal file
View File

@@ -0,0 +1,300 @@
"""
Tests for Ticker
To run all tests in suite from commandline:
python -m unittest tests.ticker
Specific test class:
python -m unittest tests.ticker.TestTicker
"""
import pandas as pd
from .context import yfinance as yf
import unittest
import requests_cache
# Set this to see the exact requests that are made during tests
DEBUG_LOG_REQUESTS = True
if DEBUG_LOG_REQUESTS:
import logging
logging.basicConfig(level=logging.DEBUG)
class TestTicker(unittest.TestCase):
session = None
@classmethod
def setUpClass(cls):
cls.session = requests_cache.CachedSession()
@classmethod
def tearDownClass(cls):
if cls.session is not None:
cls.session.close()
def test_getTz(self):
tkrs = ["IMP.JO", "BHG.JO", "SSW.JO", "BP.L", "INTC"]
for tkr in tkrs:
# First step: remove ticker from tz-cache
yf.utils.get_tz_cache().store(tkr, None)
# Test:
dat = yf.Ticker(tkr, session=self.session)
tz = dat._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
self.assertIsNotNone(tz)
def test_badTicker(self):
# Check yfinance doesn't die when ticker delisted
tkr = "AM2Z.TA"
dat = yf.Ticker(tkr, session=self.session)
dat.history(period="1wk")
dat.history(start="2022-01-01")
dat.history(start="2022-01-01", end="2022-03-01")
yf.download([tkr], period="1wk")
dat.isin
dat.major_holders
dat.institutional_holders
dat.mutualfund_holders
dat.dividends
dat.splits
dat.actions
dat.shares
dat.info
dat.calendar
dat.recommendations
dat.earnings
dat.quarterly_earnings
dat.income_stmt
dat.quarterly_income_stmt
dat.balance_sheet
dat.quarterly_balance_sheet
dat.cashflow
dat.quarterly_cashflow
dat.recommendations_summary
dat.analyst_price_target
dat.revenue_forecasts
dat.sustainability
dat.options
dat.news
dat.earnings_trend
dat.earnings_dates
dat.earnings_forecasts
class TestTickerEarnings(unittest.TestCase):
def setUp(self):
self.ticker = yf.Ticker("GOOGL")
def tearDown(self):
self.ticker = None
def test_earnings_history(self):
data = self.ticker.earnings_history
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.earnings_history
self.assertIs(data, data_cached, "data not cached")
def test_earnings(self):
data = self.ticker.earnings
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.earnings
self.assertIs(data, data_cached, "data not cached")
def test_quarterly_earnings(self):
data = self.ticker.quarterly_earnings
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.quarterly_earnings
self.assertIs(data, data_cached, "data not cached")
def test_earnings_forecasts(self):
data = self.ticker.earnings_forecasts
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.earnings_forecasts
self.assertIs(data, data_cached, "data not cached")
def test_earnings_dates(self):
data = self.ticker.earnings_dates
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.earnings_dates
self.assertIs(data, data_cached, "data not cached")
def test_earnings_trend(self):
data = self.ticker.earnings_trend
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.earnings_trend
self.assertIs(data, data_cached, "data not cached")
class TestTickerHolders(unittest.TestCase):
def setUp(self):
self.ticker = yf.Ticker("GOOGL")
def tearDown(self):
self.ticker = None
def test_major_holders(self):
data = self.ticker.major_holders
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.major_holders
self.assertIs(data, data_cached, "data not cached")
def test_institutional_holders(self):
data = self.ticker.institutional_holders
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.institutional_holders
self.assertIs(data, data_cached, "data not cached")
def test_mutualfund_holders(self):
data = self.ticker.mutualfund_holders
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.mutualfund_holders
self.assertIs(data, data_cached, "data not cached")
class TestTickerMiscFinancials(unittest.TestCase):
def setUp(self):
self.ticker = yf.Ticker("GOOGL")
def tearDown(self):
self.ticker = None
def test_balance_sheet(self):
expected_row = "TotalAssets"
data = self.ticker.balance_sheet
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
self.assertIn(expected_row, data.index, "Did not find expected row in index")
data_cached = self.ticker.balance_sheet
self.assertIs(data, data_cached, "data not cached")
def test_quarterly_balance_sheet(self):
expected_row = "TotalAssets"
data = self.ticker.quarterly_balance_sheet
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
self.assertIn(expected_row, data.index, "Did not find expected row in index")
data_cached = self.ticker.quarterly_balance_sheet
self.assertIs(data, data_cached, "data not cached")
def test_cashflow(self):
expected_row = "OperatingCashFlow"
data = self.ticker.cashflow
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
self.assertIn(expected_row, data.index, "Did not find expected row in index")
data_cached = self.ticker.cashflow
self.assertIs(data, data_cached, "data not cached")
def test_quarterly_cashflow(self):
expected_row = "OperatingCashFlow"
data = self.ticker.quarterly_cashflow
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
self.assertIn(expected_row, data.index, "Did not find expected row in index")
data_cached = self.ticker.quarterly_cashflow
self.assertIs(data, data_cached, "data not cached")
def test_sustainability(self):
data = self.ticker.sustainability
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.sustainability
self.assertIs(data, data_cached, "data not cached")
def test_recommendations(self):
data = self.ticker.recommendations
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.recommendations
self.assertIs(data, data_cached, "data not cached")
def test_recommendations_summary(self):
data = self.ticker.recommendations_summary
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.recommendations_summary
self.assertIs(data, data_cached, "data not cached")
def test_analyst_price_target(self):
data = self.ticker.analyst_price_target
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.analyst_price_target
self.assertIs(data, data_cached, "data not cached")
def test_revenue_forecasts(self):
data = self.ticker.revenue_forecasts
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.revenue_forecasts
self.assertIs(data, data_cached, "data not cached")
def test_calendar(self):
data = self.ticker.calendar
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.calendar
self.assertIs(data, data_cached, "data not cached")
def test_isin(self):
data = self.ticker.isin
self.assertIsInstance(data, str, "data has wrong type")
self.assertEqual("ARDEUT116159", data, "data is empty")
data_cached = self.ticker.isin
self.assertIs(data, data_cached, "data not cached")
def test_options(self):
data = self.ticker.options
self.assertIsInstance(data, tuple, "data has wrong type")
self.assertTrue(len(data) > 1, "data is empty")
def suite():
suite = unittest.TestSuite()
suite.addTest(TestTicker('Test ticker'))
suite.addTest(TestTickerEarnings('Test earnings'))
suite.addTest(TestTickerHolders('Test holders'))
suite.addTest(TestTickerMiscFinancials('Test misc financials'))
return suite
if __name__ == '__main__':
unittest.main()

File diff suppressed because it is too large Load Diff

153
yfinance/data.py Normal file
View File

@@ -0,0 +1,153 @@
import datetime
import functools
from functools import lru_cache
import pandas as pd
import requests as requests
import re
from frozendict import frozendict
try:
import ujson as json
except ImportError:
import json as json
cache_maxsize = 64
def lru_cache_freezeargs(func):
"""
Decorator transforms mutable dictionary arguments into immutable
Needed so lru_cache can cache method calls what has dict arguments.
"""
@functools.wraps(func)
def wrapped(*args, **kwargs):
args = tuple([frozendict(arg) if isinstance(arg, dict) else arg for arg in args])
kwargs = {k: frozendict(v) if isinstance(v, dict) else v for k, v in kwargs.items()}
return func(*args, **kwargs)
# copy over the lru_cache extra methods to this wrapper to be able to access them
# after this decorator has been applied
wrapped.cache_info = func.cache_info
wrapped.cache_clear = func.cache_clear
return wrapped
class TickerData:
"""
Have one place to retrieve data from Yahoo API in order to ease caching and speed up operations
"""
user_agent_headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
def __init__(self, ticker: str, session=None):
self._ticker = ticker
self._session = session or requests
@lru_cache_freezeargs
@lru_cache(maxsize=cache_maxsize)
def get(self, url, user_agent_headers=None, params=None, proxy=None, timeout=30):
proxy = self._get_proxy(proxy)
response = self._session.get(
url=url,
params=params,
proxies=proxy,
timeout=timeout,
headers=user_agent_headers or self.user_agent_headers)
return response
def _get_proxy(self, proxy):
# setup proxy in requests format
if proxy is not None:
if isinstance(proxy, dict) and "https" in proxy:
proxy = proxy["https"]
proxy = {"https": proxy}
return proxy
@lru_cache_freezeargs
@lru_cache(maxsize=cache_maxsize)
def get_json_data_stores(self, url, proxy=None):
'''
get_json_data_stores returns a python dictionary of the data stores in yahoo finance web page.
'''
html = self.get(url=url, proxy=proxy).text
json_str = html.split('root.App.main =')[1].split(
'(this)')[0].split(';\n}')[0].strip()
data = json.loads(json_str)['context']['dispatcher']['stores']
# return data
new_data = json.dumps(data).replace('{}', 'null')
new_data = re.sub(
r'{[\'|\"]raw[\'|\"]:(.*?),(.*?)}', r'\1', new_data)
return json.loads(new_data)
# Note cant use lru_cache as financials_data is a nested dict (freezeargs only handle flat dicts)
def get_financials_time_series(self, timescale, financials_data, proxy=None):
acceptable_timestamps = ["annual", "quarterly"]
if timescale not in acceptable_timestamps:
raise Exception("timescale '{}' must be one of: {}".format(timescale, acceptable_timestamps))
# Step 1: get the keys:
def _finditem1(key, obj):
values = []
if isinstance(obj, dict):
if key in obj.keys():
values.append(obj[key])
for k, v in obj.items():
values += _finditem1(key, v)
elif isinstance(obj, list):
for v in obj:
values += _finditem1(key, v)
return values
keys = _finditem1("key", financials_data['FinancialTemplateStore'])
# Step 2: construct url:
ts_url_base = "https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{0}?symbol={0}".format(
self._ticker)
if len(keys) == 0:
raise Exception("Fetching keys failed")
url = ts_url_base + "&type=" + ",".join([timescale + k for k in keys])
# Yahoo returns maximum 4 years or 5 quarters, regardless of start_dt:
start_dt = datetime.datetime(2016, 12, 31)
end = (datetime.datetime.now() + datetime.timedelta(days=366))
url += "&period1={}&period2={}".format(int(start_dt.timestamp()), int(end.timestamp()))
# Step 3: fetch and reshape data
json_str = self.get(url=url, proxy=proxy).text
json_data = json.loads(json_str)
data_raw = json_data["timeseries"]["result"]
# data_raw = [v for v in data_raw if len(v) > 1] # Discard keys with no data
for d in data_raw:
del d["meta"]
# Now reshape data into a table:
# Step 1: get columns and index:
timestamps = set()
data_unpacked = {}
for x in data_raw:
for k in x.keys():
if k == "timestamp":
timestamps.update(x[k])
else:
data_unpacked[k] = x[k]
timestamps = sorted(list(timestamps))
dates = pd.to_datetime(timestamps, unit="s")
df = pd.DataFrame(columns=dates, index=list(data_unpacked.keys()))
for k, v in data_unpacked.items():
if df is None:
df = pd.DataFrame(columns=dates, index=[k])
df.loc[k] = {pd.Timestamp(x["asOfDate"]): x["reportedValue"]["raw"] for x in v}
df.index = df.index.str.replace("^" + timescale, "", regex=True)
# Reorder table to match order on Yahoo website
df = df.reindex([k for k in keys if k in df.index])
df = df[sorted(df.columns, reverse=True)]
return df

View File

@@ -29,10 +29,10 @@ from . import Ticker, utils
from . import shared
def download(tickers, start=None, end=None, actions=False, threads=True, ignore_tz=True,
group_by='column', auto_adjust=False, back_adjust=False, keepna=False,
def download(tickers, start=None, end=None, actions=False, threads=True, ignore_tz=True,
group_by='column', auto_adjust=False, back_adjust=False, repair=False, keepna=False,
progress=True, period="max", show_errors=True, interval="1d", prepost=False,
proxy=None, rounding=False, timeout=None, **kwargs):
proxy=None, rounding=False, timeout=10):
"""Download yahoo tickers
:Parameters:
tickers : str, list
@@ -56,6 +56,9 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
Default is False
auto_adjust: bool
Adjust all OHLC automatically? Default is False
repair: bool
Detect currency unit 100x mixups and attempt repair
Default is False
keepna: bool
Keep NaN rows returned by Yahoo?
Default is False
@@ -111,7 +114,7 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
_download_one_threaded(ticker, period=period, interval=interval,
start=start, end=end, prepost=prepost,
actions=actions, auto_adjust=auto_adjust,
back_adjust=back_adjust, keepna=keepna,
back_adjust=back_adjust, repair=repair, keepna=keepna,
progress=(progress and i > 0), proxy=proxy,
rounding=rounding, timeout=timeout)
while len(shared._DFS) < len(tickers):
@@ -123,7 +126,8 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
data = _download_one(ticker, period=period, interval=interval,
start=start, end=end, prepost=prepost,
actions=actions, auto_adjust=auto_adjust,
back_adjust=back_adjust, keepna=keepna, proxy=proxy,
back_adjust=back_adjust, repair=repair, keepna=keepna,
proxy=proxy,
rounding=rounding, timeout=timeout)
shared._DFS[ticker.upper()] = data
if progress:
@@ -141,7 +145,7 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
if ignore_tz:
for tkr in shared._DFS.keys():
if (shared._DFS[tkr] is not None) and (shared._DFS[tkr].shape[0]>0):
if (shared._DFS[tkr] is not None) and (shared._DFS[tkr].shape[0] > 0):
shared._DFS[tkr].index = shared._DFS[tkr].index.tz_localize(None)
if len(tickers) == 1:
@@ -191,12 +195,11 @@ def _realign_dfs():
@_multitasking.task
def _download_one_threaded(ticker, start=None, end=None,
auto_adjust=False, back_adjust=False,
auto_adjust=False, back_adjust=False, repair=False,
actions=False, progress=True, period="max",
interval="1d", prepost=False, proxy=None,
keepna=False, rounding=False, timeout=None):
data = _download_one(ticker, start, end, auto_adjust, back_adjust,
keepna=False, rounding=False, timeout=10):
data = _download_one(ticker, start, end, auto_adjust, back_adjust, repair,
actions, period, interval, prepost, proxy, rounding,
keepna, timeout)
shared._DFS[ticker.upper()] = data
@@ -205,14 +208,15 @@ def _download_one_threaded(ticker, start=None, end=None,
def _download_one(ticker, start=None, end=None,
auto_adjust=False, back_adjust=False,
auto_adjust=False, back_adjust=False, repair=False,
actions=False, period="max", interval="1d",
prepost=False, proxy=None, rounding=False,
keepna=False, timeout=None):
return Ticker(ticker).history(period=period, interval=interval,
start=start, end=end, prepost=prepost,
actions=actions, auto_adjust=auto_adjust,
back_adjust=back_adjust, proxy=proxy,
rounding=rounding, keepna=keepna, many=True,
timeout=timeout)
keepna=False, timeout=10):
return Ticker(ticker).history(
period=period, interval=interval,
start=start, end=end, prepost=prepost,
actions=actions, auto_adjust=auto_adjust,
back_adjust=back_adjust, repair=repair, proxy=proxy,
rounding=rounding, keepna=keepna, timeout=timeout,
debug=False, raise_errors=False # debug and raise_errors false to not log and raise errors in threads
)

View File

@@ -21,17 +21,11 @@
from __future__ import print_function
# import time as _time
import datetime as _datetime
import requests as _requests
import pandas as _pd
# import numpy as _np
# import json as _json
# import re as _re
from collections import namedtuple as _namedtuple
from . import utils
from .base import TickerBase
@@ -48,17 +42,7 @@ class Ticker(TickerBase):
url = "{}/v7/finance/options/{}?date={}".format(
self._base_url, self.ticker, date)
# setup proxy in requests format
if proxy is not None:
if isinstance(proxy, dict) and "https" in proxy:
proxy = proxy["https"]
proxy = {"https": proxy}
r = _requests.get(
url=url,
proxies=proxy,
headers=utils.user_agent_headers
).json()
r = self._data.get(url=url, proxy=proxy).json()
if len(r.get('optionChain', {}).get('result', [])) > 0:
for exp in r['optionChain']['result'][0]['expirationDates']:
self._expirations[_datetime.datetime.utcfromtimestamp(
@@ -163,37 +147,49 @@ class Ticker(TickerBase):
return self.get_earnings(freq='quarterly')
@property
def financials(self):
return self.get_financials()
def income_stmt(self):
return self.get_income_stmt()
@property
def quarterly_financials(self):
return self.get_financials(freq='quarterly')
def quarterly_income_stmt(self):
return self.get_income_stmt(freq='quarterly')
@property
def balance_sheet(self):
return self.get_balancesheet()
return self.get_balance_sheet()
@property
def quarterly_balance_sheet(self):
return self.get_balancesheet(freq='quarterly')
return self.get_balance_sheet(freq='quarterly')
@property
def balancesheet(self):
return self.get_balancesheet()
return self.balance_sheet
@property
def quarterly_balancesheet(self):
return self.get_balancesheet(freq='quarterly')
return self.quarterly_balance_sheet
@property
def cashflow(self):
return self.get_cashflow()
return self.get_cashflow(freq="yearly")
@property
def quarterly_cashflow(self):
return self.get_cashflow(freq='quarterly')
@property
def recommendations_summary(self):
return self.get_recommendations_summary()
@property
def analyst_price_target(self):
return self.get_analyst_price_target()
@property
def revenue_forecasts(self):
return self.get_rev_forecast()
@property
def sustainability(self):
return self.get_sustainability()
@@ -209,8 +205,8 @@ class Ticker(TickerBase):
return self.get_news()
@property
def analysis(self):
return self.get_analysis()
def earnings_trend(self):
return self.get_earnings_trend()
@property
def earnings_history(self):
@@ -219,3 +215,7 @@ class Ticker(TickerBase):
@property
def earnings_dates(self):
return self.get_earnings_dates()
@property
def earnings_forecasts(self):
return self.get_earnings_forecast()

View File

@@ -25,7 +25,7 @@ from . import Ticker, multi
# from collections import namedtuple as _namedtuple
class Tickers():
class Tickers:
def __repr__(self):
return 'yfinance.Tickers object <%s>' % ",".join(self.symbols)
@@ -46,27 +46,31 @@ class Tickers():
def history(self, period="1mo", interval="1d",
start=None, end=None, prepost=False,
actions=True, auto_adjust=True, proxy=None,
actions=True, auto_adjust=True, repair=False,
proxy=None,
threads=True, group_by='column', progress=True,
timeout=None, **kwargs):
timeout=10, **kwargs):
return self.download(
period, interval,
start, end, prepost,
actions, auto_adjust, proxy,
actions, auto_adjust, repair,
proxy,
threads, group_by, progress,
timeout, **kwargs)
def download(self, period="1mo", interval="1d",
start=None, end=None, prepost=False,
actions=True, auto_adjust=True, proxy=None,
actions=True, auto_adjust=True, repair=False,
proxy=None,
threads=True, group_by='column', progress=True,
timeout=None, **kwargs):
timeout=10, **kwargs):
data = multi.download(self.symbols,
start=start, end=end,
actions=actions,
auto_adjust=auto_adjust,
repair=repair,
period=period,
interval=interval,
prepost=prepost,

View File

@@ -22,6 +22,8 @@
from __future__ import print_function
import datetime as _datetime
from typing import Dict, Union
import pytz as _tz
import requests as _requests
import re as _re
@@ -30,13 +32,18 @@ import numpy as _np
import sys as _sys
import os as _os
import appdirs as _ad
import sqlite3 as _sqlite3
import atexit as _atexit
from threading import Lock
from pytz import UnknownTimeZoneError
try:
import ujson as _json
except ImportError:
import json as _json
user_agent_headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
@@ -46,7 +53,7 @@ def is_isin(string):
def get_all_by_isin(isin, proxy=None, session=None):
if not(is_isin(isin)):
if not (is_isin(isin)):
raise ValueError("Invalid ISIN number")
from .base import _BASE_URL_
@@ -85,7 +92,9 @@ def get_news_by_isin(isin, proxy=None, session=None):
return data.get('news', {})
def empty_df(index=[]):
def empty_df(index=None):
if index is None:
index = []
empty = _pd.DataFrame(index=index, data={
'Open': _np.nan, 'High': _np.nan, 'Low': _np.nan,
'Close': _np.nan, 'Adj Close': _np.nan, 'Volume': _np.nan})
@@ -100,39 +109,136 @@ def empty_earnings_dates_df():
return empty
def get_html(url, proxy=None, session=None):
session = session or _requests
html = session.get(url=url, proxies=proxy, headers=user_agent_headers).text
return html
def build_template(data):
'''
build_template returns the details required to rebuild any of the yahoo finance financial statements in the same order as the yahoo finance webpage. The function is built to be used on the "FinancialTemplateStore" json which appears in any one of the three yahoo finance webpages: "/financials", "/cash-flow" and "/balance-sheet".
Returns:
- template_annual_order: The order that annual figures should be listed in.
- template_ttm_order: The order that TTM (Trailing Twelve Month) figures should be listed in.
- template_order: The order that quarterlies should be in (note that quarterlies have no pre-fix - hence why this is required).
- level_detail: The level of each individual line item. E.g. for the "/financials" webpage, "Total Revenue" is a level 0 item and is the summation of "Operating Revenue" and "Excise Taxes" which are level 1 items.
'''
template_ttm_order = [] # Save the TTM (Trailing Twelve Months) ordering to an object.
template_annual_order = [] # Save the annual ordering to an object.
template_order = [] # Save the ordering to an object (this can be utilized for quarterlies)
level_detail = [] # Record the level of each line item of the income statement ("Operating Revenue" and "Excise Taxes" sum to return "Total Revenue" we need to keep track of this)
for key in data['template']:
# Loop through the json to retreive the exact financial order whilst appending to the objects
template_ttm_order.append('trailing{}'.format(key['key']))
template_annual_order.append('annual{}'.format(key['key']))
template_order.append('{}'.format(key['key']))
level_detail.append(0)
if 'children' in key:
for child1 in key['children']: # Level 1
template_ttm_order.append('trailing{}'.format(child1['key']))
template_annual_order.append('annual{}'.format(child1['key']))
template_order.append('{}'.format(child1['key']))
level_detail.append(1)
if 'children' in child1:
for child2 in child1['children']: # Level 2
template_ttm_order.append('trailing{}'.format(child2['key']))
template_annual_order.append('annual{}'.format(child2['key']))
template_order.append('{}'.format(child2['key']))
level_detail.append(2)
if 'children' in child2:
for child3 in child2['children']: # Level 3
template_ttm_order.append('trailing{}'.format(child3['key']))
template_annual_order.append('annual{}'.format(child3['key']))
template_order.append('{}'.format(child3['key']))
level_detail.append(3)
if 'children' in child3:
for child4 in child3['children']: # Level 4
template_ttm_order.append('trailing{}'.format(child4['key']))
template_annual_order.append('annual{}'.format(child4['key']))
template_order.append('{}'.format(child4['key']))
level_detail.append(4)
if 'children' in child4:
for child5 in child4['children']: # Level 5
template_ttm_order.append('trailing{}'.format(child5['key']))
template_annual_order.append('annual{}'.format(child5['key']))
template_order.append('{}'.format(child5['key']))
level_detail.append(5)
return template_ttm_order, template_annual_order, template_order, level_detail
def get_json(url, proxy=None, session=None):
session = session or _requests
html = session.get(url=url, proxies=proxy, headers=user_agent_headers).text
def retreive_financial_details(data):
'''
retreive_financial_details returns all of the available financial details under the "QuoteTimeSeriesStore" for any of the following three yahoo finance webpages: "/financials", "/cash-flow" and "/balance-sheet".
if "QuoteSummaryStore" not in html:
html = session.get(url=url, proxies=proxy).text
if "QuoteSummaryStore" not in html:
return {}
Returns:
- TTM_dicts: A dictionary full of all of the available Trailing Twelve Month figures, this can easily be converted to a pandas dataframe.
- Annual_dicts: A dictionary full of all of the available Annual figures, this can easily be converted to a pandas dataframe.
'''
TTM_dicts = [] # Save a dictionary object to store the TTM financials.
Annual_dicts = [] # Save a dictionary object to store the Annual financials.
for key in data['timeSeries']: # Loop through the time series data to grab the key financial figures.
try:
if len(data['timeSeries'][key]) > 0:
time_series_dict = {}
time_series_dict['index'] = key
for each in data['timeSeries'][key]: # Loop through the years
if each == None:
continue
else:
time_series_dict[each['asOfDate']] = each['reportedValue']
# time_series_dict["{}".format(each['asOfDate'])] = data['timeSeries'][key][each]['reportedValue']
if 'trailing' in key:
TTM_dicts.append(time_series_dict)
elif 'annual' in key:
Annual_dicts.append(time_series_dict)
except Exception as e:
pass
return TTM_dicts, Annual_dicts
json_str = html.split('root.App.main =')[1].split(
'(this)')[0].split(';\n}')[0].strip()
data = _json.loads(json_str)[
'context']['dispatcher']['stores']['QuoteSummaryStore']
# add data about Shares Outstanding for companies' tickers if they are available
try:
data['annualBasicAverageShares'] = _json.loads(
json_str)['context']['dispatcher']['stores'][
'QuoteTimeSeriesStore']['timeSeries']['annualBasicAverageShares']
except Exception:
pass
# return data
new_data = _json.dumps(data).replace('{}', 'null')
new_data = _re.sub(
r'\{[\'|\"]raw[\'|\"]:(.*?),(.*?)\}', r'\1', new_data)
def format_annual_financial_statement(level_detail, annual_dicts, annual_order, ttm_dicts=None, ttm_order=None):
'''
format_annual_financial_statement formats any annual financial statement
return _json.loads(new_data)
Returns:
- _statement: A fully formatted annual financial statement in pandas dataframe.
'''
Annual = _pd.DataFrame.from_dict(annual_dicts).set_index("index")
Annual = Annual.reindex(annual_order)
Annual.index = Annual.index.str.replace(r'annual', '')
# Note: balance sheet is the only financial statement with no ttm detail
if (ttm_dicts not in [[], None]) and (ttm_order not in [[], None]):
TTM = _pd.DataFrame.from_dict(ttm_dicts).set_index("index")
TTM = TTM.reindex(ttm_order)
# Add 'TTM' prefix to all column names, so if combined we can tell
# the difference between actuals and TTM (similar to yahoo finance).
TTM.columns = ['TTM ' + str(col) for col in TTM.columns]
TTM.index = TTM.index.str.replace(r'trailing', '')
_statement = Annual.merge(TTM, left_index=True, right_index=True)
else:
_statement = Annual
_statement.index = camel2title(_statement.T)
_statement['level_detail'] = level_detail
_statement = _statement.set_index([_statement.index, 'level_detail'])
_statement = _statement[sorted(_statement.columns, reverse=True)]
_statement = _statement.dropna(how='all')
return _statement
def format_quarterly_financial_statement(_statement, level_detail, order):
'''
format_quarterly_financial_statements formats any quarterly financial statement
Returns:
- _statement: A fully formatted quarterly financial statement in pandas dataframe.
'''
_statement = _statement.reindex(order)
_statement.index = camel2title(_statement.T)
_statement['level_detail'] = level_detail
_statement = _statement.set_index([_statement.index, 'level_detail'])
_statement = _statement[sorted(_statement.columns, reverse=True)]
_statement = _statement.dropna(how='all')
_statement.columns = _pd.to_datetime(_statement.columns).date
return _statement
def camel2title(o):
@@ -141,7 +247,7 @@ def camel2title(o):
def _parse_user_dt(dt, exchange_tz):
if isinstance(dt, int):
## Should already be epoch, test with conversion:
# Should already be epoch, test with conversion:
_datetime.datetime.fromtimestamp(dt)
else:
# Convert str/date -> datetime, set tzinfo=exchange, get timestamp:
@@ -246,25 +352,230 @@ def parse_actions(data):
splits.index = _pd.to_datetime(splits.index, unit="s")
splits.sort_index(inplace=True)
splits["Stock Splits"] = splits["numerator"] / \
splits["denominator"]
splits = splits["Stock Splits"]
splits["denominator"]
splits = splits[["Stock Splits"]]
return dividends, splits
def set_df_tz(df, interval, tz):
if df.index.tz is None:
df.index = df.index.tz_localize("UTC")
df.index = df.index.tz_convert(tz)
return df
def fix_Yahoo_returning_live_separate(quotes, interval, tz_exchange):
# Yahoo bug fix. If market is open today then Yahoo normally returns
# todays data as a separate row from rest-of week/month interval in above row.
# Seems to depend on what exchange e.g. crypto OK.
# Fix = merge them together
n = quotes.shape[0]
if n > 1:
dt1 = quotes.index[n - 1]
dt2 = quotes.index[n - 2]
if quotes.index.tz is None:
dt1 = dt1.tz_localize("UTC")
dt2 = dt2.tz_localize("UTC")
dt1 = dt1.tz_convert(tz_exchange)
dt2 = dt2.tz_convert(tz_exchange)
if interval == "1d":
# Similar bug in daily data except most data is simply duplicated
# - exception is volume, *slightly* greater on final row (and matches website)
if dt1.date() == dt2.date():
# Last two rows are on same day. Drop second-to-last row
quotes = quotes.drop(quotes.index[n - 2])
else:
if interval == "1wk":
last_rows_same_interval = dt1.year == dt2.year and dt1.week == dt2.week
elif interval == "1mo":
last_rows_same_interval = dt1.month == dt2.month
elif interval == "3mo":
last_rows_same_interval = dt1.year == dt2.year and dt1.quarter == dt2.quarter
else:
last_rows_same_interval = False
if last_rows_same_interval:
# Last two rows are within same interval
idx1 = quotes.index[n - 1]
idx2 = quotes.index[n - 2]
if _np.isnan(quotes.loc[idx2, "Open"]):
quotes.loc[idx2, "Open"] = quotes["Open"][n - 1]
# Note: nanmax() & nanmin() ignores NaNs
quotes.loc[idx2, "High"] = _np.nanmax([quotes["High"][n - 1], quotes["High"][n - 2]])
quotes.loc[idx2, "Low"] = _np.nanmin([quotes["Low"][n - 1], quotes["Low"][n - 2]])
quotes.loc[idx2, "Close"] = quotes["Close"][n - 1]
if "Adj High" in quotes.columns:
quotes.loc[idx2, "Adj High"] = _np.nanmax([quotes["Adj High"][n - 1], quotes["Adj High"][n - 2]])
if "Adj Low" in quotes.columns:
quotes.loc[idx2, "Adj Low"] = _np.nanmin([quotes["Adj Low"][n - 1], quotes["Adj Low"][n - 2]])
if "Adj Close" in quotes.columns:
quotes.loc[idx2, "Adj Close"] = quotes["Adj Close"][n - 1]
quotes.loc[idx2, "Volume"] += quotes["Volume"][n - 1]
quotes = quotes.drop(quotes.index[n - 1])
return quotes
def safe_merge_dfs(df_main, df_sub, interval):
# Carefully merge 'df_sub' onto 'df_main'
# If naive merge fails, try again with reindexing df_sub:
# 1) if interval is weekly or monthly, then try with index set to start of week/month
# 2) if still failing then manually search through df_main.index to reindex df_sub
if df_sub.shape[0] == 0:
raise Exception("No data to merge")
df_sub_backup = df_sub.copy()
data_cols = [c for c in df_sub.columns if c not in df_main]
if len(data_cols) > 1:
raise Exception("Expected 1 data col")
data_col = data_cols[0]
def _reindex_events(df, new_index, data_col_name):
if len(new_index) == len(set(new_index)):
# No duplicates, easy
df.index = new_index
return df
df["_NewIndex"] = new_index
# Duplicates present within periods but can aggregate
if data_col_name == "Dividends":
# Add
df = df.groupby("_NewIndex").sum()
df.index.name = None
elif data_col_name == "Stock Splits":
# Product
df = df.groupby("_NewIndex").prod()
df.index.name = None
else:
raise Exception("New index contains duplicates but unsure how to aggregate for '{}'".format(data_col_name))
if "_NewIndex" in df.columns:
df = df.drop("_NewIndex", axis=1)
return df
df = df_main.join(df_sub)
f_na = df[data_col].isna()
data_lost = sum(~f_na) < df_sub.shape[0]
if not data_lost:
return df
# Lost data during join()
# Backdate all df_sub.index dates to start of week/month
if interval == "1wk":
new_index = _pd.PeriodIndex(df_sub.index, freq='W').to_timestamp()
elif interval == "1mo":
new_index = _pd.PeriodIndex(df_sub.index, freq='M').to_timestamp()
elif interval == "3mo":
new_index = _pd.PeriodIndex(df_sub.index, freq='Q').to_timestamp()
else:
new_index = None
if new_index is not None:
new_index = new_index.tz_localize(df.index.tz, ambiguous=True)
df_sub = _reindex_events(df_sub, new_index, data_col)
df = df_main.join(df_sub)
f_na = df[data_col].isna()
data_lost = sum(~f_na) < df_sub.shape[0]
if not data_lost:
return df
# Lost data during join(). Manually check each df_sub.index date against df_main.index to
# find matching interval
df_sub = df_sub_backup.copy()
new_index = [-1] * df_sub.shape[0]
for i in range(df_sub.shape[0]):
dt_sub_i = df_sub.index[i]
if dt_sub_i in df_main.index:
new_index[i] = dt_sub_i
continue
# Found a bad index date, need to search for near-match in df_main (same week/month)
fixed = False
for j in range(df_main.shape[0] - 1):
dt_main_j0 = df_main.index[j]
dt_main_j1 = df_main.index[j + 1]
if (dt_main_j0 <= dt_sub_i) and (dt_sub_i < dt_main_j1):
fixed = True
if interval.endswith('h') or interval.endswith('m'):
# Must also be same day
fixed = (dt_main_j0.date() == dt_sub_i.date()) and (dt_sub_i.date() == dt_main_j1.date())
if fixed:
dt_sub_i = dt_main_j0
break
if not fixed:
last_main_dt = df_main.index[df_main.shape[0] - 1]
diff = dt_sub_i - last_main_dt
if interval == "1mo" and last_main_dt.month == dt_sub_i.month:
dt_sub_i = last_main_dt
fixed = True
elif interval == "3mo" and last_main_dt.year == dt_sub_i.year and last_main_dt.quarter == dt_sub_i.quarter:
dt_sub_i = last_main_dt
fixed = True
elif interval == "1wk":
if last_main_dt.week == dt_sub_i.week:
dt_sub_i = last_main_dt
fixed = True
elif (dt_sub_i >= last_main_dt) and (dt_sub_i - last_main_dt < _datetime.timedelta(weeks=1)):
# With some specific start dates (e.g. around early Jan), Yahoo
# messes up start-of-week, is Saturday not Monday. So check
# if same week another way
dt_sub_i = last_main_dt
fixed = True
elif interval == "1d" and last_main_dt.day == dt_sub_i.day:
dt_sub_i = last_main_dt
fixed = True
elif interval == "1h" and last_main_dt.hour == dt_sub_i.hour:
dt_sub_i = last_main_dt
fixed = True
elif interval.endswith('m') or interval.endswith('h'):
td = _pd.to_timedelta(interval)
if (dt_sub_i >= last_main_dt) and (dt_sub_i - last_main_dt < td):
dt_sub_i = last_main_dt
fixed = True
new_index[i] = dt_sub_i
df_sub = _reindex_events(df_sub, new_index, data_col)
df = df_main.join(df_sub)
f_na = df[data_col].isna()
data_lost = sum(~f_na) < df_sub.shape[0]
if data_lost:
## Not always possible to match events with trading, e.g. when released pre-market.
## So have to append to bottom with nan prices.
## But should only be impossible with intra-day price data.
if interval.endswith('m') or interval.endswith('h'):
f_missing = ~df_sub.index.isin(df.index)
df_sub_missing = df_sub[f_missing]
keys = {"Adj Open", "Open", "Adj High", "High", "Adj Low", "Low", "Adj Close",
"Close"}.intersection(df.columns)
df_sub_missing[list(keys)] = _np.nan
df = _pd.concat([df, df_sub_missing], sort=True)
else:
raise Exception("Lost data during merge despite all attempts to align data (see above)")
return df
def fix_Yahoo_dst_issue(df, interval):
if interval in ["1d","1w","1wk"]:
if interval in ["1d", "1w", "1wk"]:
# These intervals should start at time 00:00. But for some combinations of date and timezone,
# Yahoo has time off by few hours (e.g. Brazil 23:00 around Jan-2022). Suspect DST problem.
# The clue is (a) minutes=0 and (b) hour near 0.
# Obviously Yahoo meant 00:00, so ensure this doesn't affect date conversion:
f_pre_midnight = (df.index.minute == 0) & (df.index.hour.isin([22,23]))
dst_error_hours = _np.array([0]*df.shape[0])
dst_error_hours[f_pre_midnight] = 24-df.index[f_pre_midnight].hour
f_pre_midnight = (df.index.minute == 0) & (df.index.hour.isin([22, 23]))
dst_error_hours = _np.array([0] * df.shape[0])
dst_error_hours[f_pre_midnight] = 24 - df.index[f_pre_midnight].hour
df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
return df
def is_valid_timezone(tz: str) -> bool:
try:
_tz.timezone(tz)
except UnknownTimeZoneError:
return False
return True
class ProgressBar:
def __init__(self, iterations, text='completed'):
self.text = text
@@ -305,54 +616,167 @@ class ProgressBar:
all_full = self.width - 2
num_hashes = int(round((percent_done / 100.0) * all_full))
self.prog_bar = '[' + self.fill_char * \
num_hashes + ' ' * (all_full - num_hashes) + ']'
num_hashes + ' ' * (all_full - num_hashes) + ']'
pct_place = (len(self.prog_bar) // 2) - len(str(percent_done))
pct_string = '%d%%' % percent_done
self.prog_bar = self.prog_bar[0:pct_place] + \
(pct_string + self.prog_bar[pct_place + len(pct_string):])
(pct_string + self.prog_bar[pct_place + len(pct_string):])
def __str__(self):
return str(self.prog_bar)
# Simple file cache of ticker->timezone:
_cache_dp = None
def get_cache_dirpath():
if _cache_dp is None:
dp = _os.path.join(_ad.user_cache_dir(), "py-yfinance")
else:
dp = _os.path.join(_cache_dp, "py-yfinance")
return dp
def set_tz_cache_location(dp):
global _cache_dp
_cache_dp = dp
# ---------------------------------
# TimeZone cache related code
# ---------------------------------
def cache_lookup_tkr_tz(tkr):
fp = _os.path.join(get_cache_dirpath(), "tkr-tz.csv")
if not _os.path.isfile(fp):
class _KVStore:
"""Simpel Sqlite backed key/value store, key and value are strings. Should be thread safe."""
def __init__(self, filename):
self._cache_mutex = Lock()
with self._cache_mutex:
self.conn = _sqlite3.connect(filename, timeout=10, check_same_thread=False)
self.conn.execute('pragma journal_mode=wal')
self.conn.execute('create table if not exists "kv" (key TEXT primary key, value TEXT) without rowid')
self.conn.commit()
_atexit.register(self.close)
def close(self):
if self.conn is not None:
with self._cache_mutex:
self.conn.close()
self.conn = None
def get(self, key: str) -> Union[str, None]:
"""Get value for key if it exists else returns None"""
item = self.conn.execute('select value from "kv" where key=?', (key,))
if item:
return next(item, (None,))[0]
def set(self, key: str, value: str) -> None:
with self._cache_mutex:
self.conn.execute('replace into "kv" (key, value) values (?,?)', (key, value))
self.conn.commit()
def bulk_set(self, kvdata: Dict[str, str]):
records = tuple(i for i in kvdata.items())
with self._cache_mutex:
self.conn.executemany('replace into "kv" (key, value) values (?,?)', records)
self.conn.commit()
def delete(self, key: str):
with self._cache_mutex:
self.conn.execute('delete from "kv" where key=?', (key,))
self.conn.commit()
class _TzCacheException(Exception):
pass
class _TzCache:
"""Simple sqlite file cache of ticker->timezone"""
def __init__(self):
self._tz_db = None
self._setup_cache_folder()
def _setup_cache_folder(self):
if not _os.path.isdir(self._db_dir):
try:
_os.makedirs(self._db_dir)
except OSError as err:
raise _TzCacheException("Error creating TzCache folder: '{}' reason: {}"
.format(self._db_dir, err))
elif not (_os.access(self._db_dir, _os.R_OK) and _os.access(self._db_dir, _os.W_OK)):
raise _TzCacheException("Cannot read and write in TzCache folder: '{}'"
.format(self._db_dir, ))
def lookup(self, tkr):
return self.tz_db.get(tkr)
def store(self, tkr, tz):
if tz is None:
self.tz_db.delete(tkr)
elif self.tz_db.get(tkr) is not None:
raise Exception("Tkr {} tz already in cache".format(tkr))
else:
self.tz_db.set(tkr, tz)
@property
def _db_dir(self):
global _cache_dir
return _os.path.join(_cache_dir, "py-yfinance")
@property
def tz_db(self):
# lazy init
if self._tz_db is None:
self._tz_db = _KVStore(_os.path.join(self._db_dir, "tkr-tz.db"))
self._migrate_cache_tkr_tz()
return self._tz_db
def _migrate_cache_tkr_tz(self):
"""Migrate contents from old ticker CSV-cache to SQLite db"""
fp = _os.path.join(self._db_dir, "tkr-tz.csv")
if not _os.path.isfile(fp):
return None
df = _pd.read_csv(fp, index_col="Ticker")
self.tz_db.bulk_set(df.to_dict()['Tz'])
_os.remove(fp)
class _TzCacheDummy:
"""Dummy cache to use if tz cache is disabled"""
def lookup(self, tkr):
return None
df = _pd.read_csv(fp)
f = df["Ticker"] == tkr
if sum(f) == 0:
def store(self, tkr, tz):
pass
@property
def tz_db(self):
return None
return df["Tz"][f].iloc[0]
def cache_store_tkr_tz(tkr,tz):
df = _pd.DataFrame({"Ticker":[tkr], "Tz":[tz]})
dp = get_cache_dirpath()
if not _os.path.isdir(dp):
_os.makedirs(dp)
fp = _os.path.join(dp, "tkr-tz.csv")
if not _os.path.isfile(fp):
df.to_csv(fp, index=False)
return
def get_tz_cache():
"""
Get the timezone cache, initializes it and creates cache folder if needed on first call.
If folder cannot be created for some reason it will fall back to initialize a
dummy cache with same interface as real cash.
"""
# as this can be called from multiple threads, protect it.
with _cache_init_lock:
global _tz_cache
if _tz_cache is None:
try:
_tz_cache = _TzCache()
except _TzCacheException as err:
print("Failed to create TzCache, reason: {}".format(err))
print("TzCache will not be used.")
print("Tip: You can direct cache to use a different location with 'set_tz_cache_location(mylocation)'")
_tz_cache = _TzCacheDummy()
df_all = _pd.read_csv(fp)
f = df_all["Ticker"]==tkr
if sum(f) > 0:
raise Exception("Tkr {} tz already in cache".format(tkr))
return _tz_cache
_pd.concat([df_all,df]).to_csv(fp, index=False)
_cache_dir = _ad.user_cache_dir()
_cache_init_lock = Lock()
_tz_cache = None
def set_tz_cache_location(cache_dir: str):
"""
Sets the path to create the "py-yfinance" cache folder in.
Useful if the default folder returned by "appdir.user_cache_dir()" is not writable.
Must be called before cache is used (that is, before fetching tickers).
:param cache_dir: Path to use for caches
:return: None
"""
global _cache_dir, _tz_cache
assert _tz_cache is None, "Time Zone cache already initialized, setting path must be done before cache is created"
_cache_dir = cache_dir

View File

@@ -1 +1 @@
version = "0.1.81"
version = "0.2.0rc2"