Compare commits

...

97 Commits

Author SHA1 Message Date
ValueRaider
620e29cf05 Fix valuations table construction 2023-01-21 14:36:27 +00:00
ValueRaider
b759ef03ca Add Ticker.valuations 2023-01-21 14:24:46 +00:00
ValueRaider
fb9c72c35e Implement scraping for 'key-statistics' via 'Ticker.stats' 2023-01-21 14:06:57 +00:00
ValueRaider
5d9a91da4a Improve 'get_shares_full()' error handling ; Minor fixes 2023-01-14 22:44:54 +00:00
ValueRaider
47c579ff22 Merge pull request #1297 from alexa-infra/fix-stores-decryption
Fix stores decrypt
2023-01-14 20:06:52 +00:00
ValueRaider
caf5cba801 Merge pull request #1301 from ranaroussi/feature/share-count
Feature/share count
2023-01-14 19:53:45 +00:00
ValueRaider
486c7894ce get_shares_full(): convert to pd.Series, add test 2023-01-14 17:32:54 +00:00
ValueRaider
db8a00edae get_shares_full(): remove caching, tidy API 2023-01-14 17:11:57 +00:00
ValueRaider
805523b924 Fix 'get_shares_full()' post-rebase 2023-01-14 16:58:58 +00:00
ValueRaider
32ab2e648d get_shares_full() set default range 1yr 2023-01-14 16:35:54 +00:00
ValueRaider
4d91ae740a Add date args to 'shares_full()' and caching 2023-01-14 16:35:54 +00:00
ValueRaider
05ec4b4312 Add full share count history via 'shares_full' 2023-01-14 16:35:51 +00:00
ValueRaider
cd2c1ada14 Improve decrypt key deduction 2023-01-14 15:41:33 +00:00
ValueRaider
4ca9642403 Ensure 'requests_cache' responses processed ; Improve naming 2023-01-14 14:20:40 +00:00
Alexey Vasilyev
b438f29a71 Fix decryption 2023-01-14 08:06:35 +01:00
ValueRaider
4db178b8d6 Merge pull request #1284 from ranaroussi/fix/financials-caching
Improve caching of financials data
2023-01-12 11:47:04 +00:00
ValueRaider
38637a9821 Merge pull request #1283 from DE0CH/ignore-tz-false
Change default value to ignore_tz to False
2023-01-08 12:45:00 +00:00
Deyao Chen
de8c0bdcdd Change default value to ignore_tz to False
Bring the behavior of download() to be the same as 0.1.77.
2023-01-08 11:47:13 +08:00
ValueRaider
fd35975cf9 Improve caching of financials data 2023-01-07 18:02:16 +00:00
ValueRaider
1495834a09 Merge pull request #1276 from gogog22510/main
Fix the database lock error in multithread download
2023-01-04 23:10:22 +00:00
ValueRaider
2a7588dead Tidy DB lock fix 2023-01-04 21:32:54 +00:00
gogog22510
051de748b9 Fix the database lock error in multithread download 2023-01-04 12:37:59 -05:00
ValueRaider
97adb30d41 Merge pull request #1262 from ranaroussi/main
Sync `main` -> `dev`
2022-12-20 20:42:10 +00:00
ValueRaider
eacfbc45c0 Bump version to 0.2.3 2022-12-20 11:57:04 +00:00
ValueRaider
8deddd7ee9 Make financials API '_' use consistent 2022-12-20 11:56:57 +00:00
ValueRaider
beb494b67e README: add small section on version 0.2 2022-12-20 11:37:16 +00:00
ValueRaider
e2948a8b48 Bump version to 0.2.2 2022-12-20 11:33:04 +00:00
ValueRaider
ff3d3f2f78 Restore 'financials' attribute (map to 'income_stmt') 2022-12-20 11:32:19 +00:00
ValueRaider
85783da515 README: update 'repair' doc 2022-12-19 23:30:29 +00:00
ValueRaider
9dbfad4294 Bump version to 0.2.1 2022-12-19 23:19:42 +00:00
ValueRaider
5e54b92efd Fix _reconstruct_intervals_batch() calibration bug 2022-12-19 18:09:06 +00:00
ValueRaider
cffdbd47b5 Merge pull request #1253 from Rogach/pr/decode-stores
decode encrypted root.App.main.context.dispatcher.stores
2022-12-19 12:29:57 +00:00
ValueRaider
f398f46509 Switch 'pycryptodome' -> 'cryptography' 2022-12-19 12:28:51 +00:00
ValueRaider
097c76aa46 Add 'pycryptodome' requirement 2022-12-18 13:26:12 +00:00
ValueRaider
a9da16e048 Fix get_json_data_stores() behaviour 2022-12-18 13:19:11 +00:00
Platon Pronko
8e5f0984af decode encrypted root.App.main.context.dispatcher.stores 2022-12-18 11:40:26 +04:00
ValueRaider
38b738e766 Bump version to 0.2.0rc5 2022-12-16 16:27:46 +00:00
ValueRaider
55772d30a4 Merge pull request #1245 from ranaroussi/dev
Merge dev -> main for release 0.2.0rc5
2022-12-16 16:25:36 +00:00
ValueRaider
382285cfd9 Remove hardcoded paths 2022-12-16 16:24:16 +00:00
ValueRaider
d2e5ce284e Merge pull request #1243 from ranaroussi/fix/financials-error-handling
Improve financials error handling
2022-12-16 16:20:25 +00:00
ValueRaider
88d21d742d Merge pull request #1244 from ranaroussi/fix/repair-100x
Fix '100x price' repair
2022-12-16 16:20:17 +00:00
ValueRaider
7a0356d47b Document financials get() methods 2022-12-16 16:19:37 +00:00
ValueRaider
a13bf0cd6c Hide divide-by-0 warnings 2022-12-16 15:05:38 +00:00
ValueRaider
7cacf233ce Improve financials error handling
Nicely intercept parse errors in get_json_data_stores() & _create_financials_table_old() ; Improve exception messages ; Fix typo 'YFiance'
2022-12-16 13:22:17 +00:00
ValueRaider
b48212e420 Repair-100x now tolerates zeroes 2022-12-14 21:16:16 +00:00
ValueRaider
f10f9970b2 Bump version to 0.2.0rc4 2022-12-13 22:12:23 +00:00
ValueRaider
96ff214107 Fix tests 2022-12-13 21:45:28 +00:00
ValueRaider
e7bf3607e8 Fix tests 2022-12-13 21:41:46 +00:00
ValueRaider
2883362a0e Merge pull request #1238 from ranaroussi/dev
Merge dev -> main for release 0.2.0rc3 (or official?)
2022-12-13 21:22:43 +00:00
ValueRaider
df7af507f0 Merge pull request #1233 from ranaroussi/revise-reqs
Raise reqs min versions (lxml, pandas)
2022-12-13 18:12:48 +00:00
ValueRaider
46dbed3e7e Merge pull request #1235 from ymyke/feature/add-history-metadata
Add `history_metadata` property
2022-12-13 18:09:14 +00:00
ValueRaider
46d5579caa Merge pull request #1236 from ranaroussi/feature/improve-reconstruction
Improve price repair
2022-12-13 17:28:21 +00:00
ValueRaider
11a3a9d457 Raise min lxml & pandas, sync all reqs lists 2022-12-13 15:25:34 +00:00
ValueRaider
6dca1eea96 Don't repair prices if can't calibrate 2022-12-13 14:47:27 +00:00
ymyke
85ef53c6bb Store _history_metadata earlier and use that attribute for further metadata access in the same function 2022-12-13 08:27:12 +01:00
ValueRaider
4c41ba0a50 Improve price repair
Minimise _reconstruct_intervals() #requests ; Refine when to repair NaNs
2022-12-12 16:43:24 +00:00
ymyke
6f60a78262 Add history_metadata property
Including test and README mention.

See also https://github.com/ranaroussi/yfinance/issues/1195.
2022-12-12 17:16:05 +01:00
ValueRaider
8f083818c3 Merge pull request #1232 from ranaroussi/fix/no-history-caching
If fetching price history ending in future, don't use cache
2022-12-10 21:13:39 +00:00
ValueRaider
791c845d23 Merge pull request #1194 from ranaroussi/feature/old-financials-backup
Serve old financials when new financials are missing
2022-12-10 21:13:09 +00:00
ValueRaider
aeea23229f Merge branch 'dev' into feature/old-financials-backup 2022-12-10 21:12:06 +00:00
ValueRaider
e91ffe4844 Replace 'fallback' with 'legacy' arg 2022-12-10 21:05:42 +00:00
ValueRaider
df9d456cf6 Merge pull request #1221 from ranaroussi/feature/financials-format-default
Default enable 'pretty' financials, explain in README
2022-12-10 19:44:13 +00:00
ValueRaider
4c89e8aefa Account for data delay ; Remove debug code ; Fix session test 2022-12-10 18:27:23 +00:00
ValueRaider
7ddce7f80b Update issue template - add note on Yahoo spam 2022-12-08 13:57:21 +00:00
ValueRaider
b3dbbc46e2 If fetching price history ending in future, don't use cache 2022-12-06 18:04:30 +00:00
ValueRaider
762d446661 Default enable 'pretty' financials, explain in README 2022-12-01 18:49:43 +00:00
ValueRaider
1aa3c3d9a8 Merge pull request #1220 from ranaroussi/feature/improve-repair-zero
Improve handling dividends without matching price interval
2022-12-01 17:14:59 +00:00
ValueRaider
0f6ad3290d Merge pull request #1217 from ranaroussi/fix/Yahoo-duplication-fix
Extend Yahoo duplication fix to intra-day
2022-12-01 17:14:41 +00:00
ValueRaider
e26a4c5a1c Improve handling dividends without matching price interval
Tolerate merging daily dividend event without matching prices interval (just append).
Move price-repair to after merge, to fix these missing prices intervals.
Improve bad-price detection & repair.
2022-12-01 17:11:05 +00:00
ValueRaider
d963e3fe1c Fix dev merge ; Fix financials fallback fetch 2022-12-01 15:47:37 +00:00
ValueRaider
0cd54486d0 Merge pull request #1216 from ymyke/fix/readme-several
Fix a couple of minor issues in README
2022-11-30 22:35:47 +00:00
ValueRaider
f93c3d76ce Extend Yahoo duplication fix to intra-day 2022-11-30 17:05:22 +00:00
ValueRaider
8bf7576b33 Merge pull request #1215 from fredrik-corneliusson/dev_verify_ticker_history_call
Test to verify ticker history request.
2022-11-29 23:11:46 +00:00
ymyke
2eae33bd33 Fix a couple of minor issues in README
- Typos in variable name
- `Ticker` doesn't support several tickers
- `Tickers` doesn't return named tuple
- "1m" in `download` would produce an error for longer timeframes, so
  changing the example to "5d"
2022-11-29 23:28:16 +01:00
Fredrik Corneliusson
5e333f53ee #1213 Added test asserting no harmful requests are added to history call. 2022-11-29 01:18:59 +01:00
ValueRaider
9c249a100f Merge pull request #1203 from ranaroussi/fix/capital-gains-perf-regression
Get quote type from metadata instead info[] -> faster
2022-11-28 18:13:29 +00:00
ValueRaider
0ee3d6d72d Merge pull request #1208 from fredrik-corneliusson/mydev
#1207 Fixed regression issue with Python < 3.9
2022-11-27 19:23:33 +00:00
ValueRaider
3c218b81a3 Merge pull request #1210 from fredrik-corneliusson/mydev_1209
#1209 Fixed pretty format alters cached dataframe
2022-11-27 19:22:06 +00:00
ValueRaider
80dc0e8488 Merge branch 'dev' into feature/old-financials-backup 2022-11-27 19:19:03 +00:00
ValueRaider
4064ec53c3 Move financials fallback logic into Ticker 2022-11-27 19:15:35 +00:00
Fredrik Corneliusson
37ac9bd1d5 #1209 Fixed pretty format alters cached dataframe 2022-11-27 19:25:08 +01:00
Fredrik Corneliusson
e234b8c5ab #1207 Fixed regression issue with Python < 3.9 2022-11-27 19:00:45 +01:00
ValueRaider
efc56c43c2 Improve bug issue template - request version info 2022-11-27 12:50:56 +00:00
ValueRaider
50de008820 Merge pull request #1193 from ranaroussi/fix/financials-formatting
Fix financials formatting
2022-11-26 21:40:30 +00:00
ValueRaider
d7baa0713e Get quote type from metadata instead info[] -> faster 2022-11-25 22:18:09 +00:00
ValueRaider
3b19ef12bc camel2title(): restrict acceptable inputs 2022-11-24 20:36:00 +00:00
ValueRaider
dfb15e6778 Unit tests for financials formatting 2022-11-23 18:16:51 +00:00
ValueRaider
379b87d925 Moved financials formatting up into get()
Moved financials formatting up into get(), controlled by new 'pretty' argument. Extend camel2title() to accept different separator char and to preserve acronyms case e.g. 'EBIT'
2022-11-23 17:45:45 +00:00
ValueRaider
b856041b53 Merge pull request #1177 from ranaroussi/fix/dst-nonexistent
Fix localizing midnight when non-existent (DST) #1174
2022-11-22 22:19:40 +00:00
ValueRaider
b3b36c5cc9 Restore old financials as backup if new missing 2022-11-22 22:17:07 +00:00
ValueRaider
ab1476c0d1 Restore financials nesting code (commented) 2022-11-22 21:46:26 +00:00
ValueRaider
566a38b432 Fix financials index formatting 2022-11-22 21:46:04 +00:00
ValueRaider
744e70ffff Add issue template for 'feature request' 2022-11-19 13:46:06 +00:00
ValueRaider
2970d9460f Fix localizing midnight when non-existent (DST) #1174 2022-11-16 12:34:36 +00:00
ValueRaider
55fd565ef0 Update bug_report.md - ask 'Does Yahoo have data?' 2022-11-14 20:45:30 +00:00
ValueRaider
b67372e4eb Version 0.2.0rc2 2022-11-12 21:28:22 +00:00
ValueRaider
77107c6ea0 Merge pull request #1168 from ranaroussi/dev
Merge dev -> main for release 0.2.0rc2
2022-11-12 21:20:34 +00:00
20 changed files with 1559 additions and 402 deletions

View File

@@ -7,14 +7,36 @@ assignees: ''
---
*** READ BEFORE POSTING ***
# READ BEFORE POSTING
Before posting an issue - please upgrade to the latest version and confirm the issue/bug is still there.
### Are you up-to-date?
Upgrade to the latest version and confirm the issue/bug is still there.
Upgrade using:
`$ pip install yfinance --upgrade --no-cache-dir`
Bug still there? Delete this content and submit your bug report here and provide the following, as best you can:
Confirm by running:
`import yfinance as yf ; print(yf.__version__)`
and comparing against [PIP](https://pypi.org/project/yfinance/#history).
### Does Yahoo actually have the data?
Visit `finance.yahoo.com` and confim they have your data. Maybe your ticker was delisted.
Then check that you are spelling ticker *exactly* same as Yahoo.
### Are you spamming Yahoo?
Yahoo Finance free service has limit on query rate (roughly 100/s). Them delaying or blocking your spam is not a bug.
### Still think it's a bug?
Delete this default message and submit your bug report here, providing the following as best you can:
- Info about your system:
- yfinance version
- operating system
- Simple code that reproduces your problem
- The error message

View File

@@ -0,0 +1,14 @@
---
name: Feature request
about: Request a new feature
title: ''
labels: ''
assignees: ''
---
**Describe the problem**
**Describe the solution**
**Additional context**

View File

@@ -1,8 +1,46 @@
Change Log
===========
0.2.3
-----
- Make financials API '_' use consistent
0.2.2
-----
- Restore 'financials' attribute (map to 'income_stmt')
0.2.1
-----
Release!
0.2.0rc5
--------
- Improve financials error handling #1243
- Fix '100x price' repair #1244
0.2.0rc4
--------
- Access to old financials tables via `get_income_stmt(legacy=True)`
- Optimise scraping financials & fundamentals, 2x faster
- Add 'capital gains' alongside dividends & splits for ETFs, and metadata available via `history_metadata`, plus a bunch of price fixes
For full list of changes see #1238
0.2.0rc2
--------
Financials
- fix financials tables to match website #1128 #1157
- lru_cache to optimise web requests #1147
Prices
- improve price repair #1148
- fix merging dividends/splits with day/week/monthly prices #1161
- fix the Yahoo DST fixes #1143
- improve bad/delisted ticker handling #1140
Misc
- fix 'trailingPegRatio' #1138
- improve error handling #1118
0.2.0rc1
------
--------
Jumping to 0.2 for this big update. 0.1.* will continue to receive bug-fixes
- timezone cache performance massively improved. Thanks @fredrik-corneliusson #1113 #1112 #1109 #1105 #1099
- price repair feature #1110

View File

@@ -42,6 +42,13 @@ Yahoo! finance API is intended for personal use only.**
---
## What's new in version 0.2
- Optimised web scraping
- All 3 financials tables now match website so expect keys to change. If you really want old tables, use [`Ticker.get_[income_stmt|balance_sheet|cashflow](legacy=True, ...)`](https://github.com/ranaroussi/yfinance/blob/85783da515761a145411d742c2a8a3c1517264b0/yfinance/base.py#L968)
- price data improvements: fix bug NaN rows with dividend; new repair feature for missing or 100x prices `download(repair=True)`; new attribute `Ticker.history_metadata`
[See release notes for full list of changes](https://github.com/ranaroussi/yfinance/releases/tag/0.2.1)
## Quick Start
### The Ticker module
@@ -56,9 +63,16 @@ msft = yf.Ticker("MSFT")
# get stock info
msft.info
# get stock price statistics
msft.stats
msft.valuations
# get historical market data
hist = msft.history(period="max")
# show meta information about the history (requires history() to be called first)
msft.history_metadata
# show actions (dividends, splits, capital gains)
msft.actions
@@ -74,18 +88,19 @@ msft.capital_gains
# show share count
msft.shares
msft.get_shares_full()
# show income statement
# show financials:
# - income statement
msft.income_stmt
msft.quarterly_income_stmt
# show balance sheet
# - balance sheet
msft.balance_sheet
msft.quarterly_balance_sheet
# show cash flow statement
# - cash flow statement
msft.cashflow
msft.quarterly_cashflow
# see `Ticker.get_income_stmt()` for more options
# show major holders
msft.major_holders
@@ -108,9 +123,9 @@ msft.recommendations
msft.recommendations_summary
# show analysts other work
msft.analyst_price_target
mfst.revenue_forecasts
mfst.earnings_forecasts
mfst.earnings_trend
msft.revenue_forecasts
msft.earnings_forecasts
msft.earnings_trend
# show next event (earnings, etc)
msft.calendar
@@ -160,7 +175,7 @@ the Ticker constructor.
import requests_cache
session = requests_cache.CachedSession('yfinance.cache')
session.headers['User-agent'] = 'my-program/1.0'
ticker = yf.Ticker('msft aapl goog', session=session)
ticker = yf.Ticker('msft', session=session)
# The scraped response will be stored in the cache
ticker.actions
```
@@ -171,7 +186,6 @@ To initialize multiple `Ticker` objects, use
import yfinance as yf
tickers = yf.Tickers('msft aapl goog')
# ^ returns a named tuple of Ticker objects
# access each ticker using (example)
tickers.tickers['MSFT'].info
@@ -201,11 +215,10 @@ data = yf.download( # or pdr.get_data_yahoo(...
# fetch data by interval (including intraday if period < 60 days)
# valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
# (optional, default is '1d')
interval = "1m",
interval = "5d",
# Whether to ignore timezone when aligning ticker data from
# different timezones. Default is True. False may be useful for
# minute/hourly data.
# different timezones. Default is False.
ignore_tz = False,
# group by ticker (to access via data['SPY'])
@@ -216,7 +229,7 @@ data = yf.download( # or pdr.get_data_yahoo(...
# (optional, default is False)
auto_adjust = True,
# identify and attempt repair of currency unit mixups e.g. $/cents
# attempt repair of missing data or currency mixups e.g. $/cents
repair = False,
# download pre/post regular market hours data
@@ -295,12 +308,16 @@ To install `yfinance` using `conda`, see
### Requirements
- [Python](https://www.python.org) \>= 2.7, 3.4+
- [Pandas](https://github.com/pydata/pandas) (tested to work with
\>=0.23.1)
- [Numpy](http://www.numpy.org) \>= 1.11.1
- [requests](http://docs.python-requests.org/en/master/) \>= 2.14.2
- [lxml](https://pypi.org/project/lxml/) \>= 4.5.1
- [appdirs](https://pypi.org/project/appdirs) \>=1.4.4
- [Pandas](https://github.com/pydata/pandas) \>= 1.3.0
- [Numpy](http://www.numpy.org) \>= 1.16.5
- [requests](http://docs.python-requests.org/en/master) \>= 2.26
- [lxml](https://pypi.org/project/lxml) \>= 4.9.1
- [appdirs](https://pypi.org/project/appdirs) \>= 1.4.4
- [pytz](https://pypi.org/project/pytz) \>=2022.5
- [frozendict](https://pypi.org/project/frozendict) \>= 2.3.4
- [beautifulsoup4](https://pypi.org/project/beautifulsoup4) \>= 4.11.1
- [html5lib](https://pypi.org/project/html5lib) \>= 1.1
- [cryptography](https://pypi.org/project/cryptography) \>= 3.3.2
### Optional (if you want to use `pandas_datareader`)

View File

@@ -1,5 +1,5 @@
{% set name = "yfinance" %}
{% set version = "0.1.58" %}
{% set version = "0.2.3" %}
package:
name: "{{ name|lower }}"
@@ -16,22 +16,34 @@ build:
requirements:
host:
- pandas >=0.24.0
- pandas >=1.3.0
- numpy >=1.16.5
- requests >=2.21
- requests >=2.26
- multitasking >=0.0.7
- lxml >=4.5.1
- appdirs >= 1.4.4
- lxml >=4.9.1
- appdirs >=1.4.4
- pytz >=2022.5
- frozendict >=2.3.4
- beautifulsoup4 >=4.11.1
- html5lib >=1.1
# - pycryptodome >=3.6.6
- cryptography >=3.3.2
- pip
- python
run:
- pandas >=0.24.0
- pandas >=1.3.0
- numpy >=1.16.5
- requests >=2.21
- requests >=2.26
- multitasking >=0.0.7
- lxml >=4.5.1
- appdirs >= 1.4.4
- lxml >=4.9.1
- appdirs >=1.4.4
- pytz >=2022.5
- frozendict >=2.3.4
- beautifulsoup4 >=4.11.1
- html5lib >=1.1
# - pycryptodome >=3.6.6
- cryptography >=3.3.2
- python
test:

View File

@@ -1,10 +1,11 @@
pandas>=1.1.0
pandas>=1.3.0
numpy>=1.16.5
requests>=2.26
multitasking>=0.0.7
lxml>=4.5.1
lxml>=4.9.1
appdirs>=1.4.4
pytz>=2022.5
frozendict>=2.3.4
beautifulsoup4>=4.11.1
html5lib>=1.1
cryptography>=3.3.2

View File

@@ -59,10 +59,12 @@ setup(
platforms=['any'],
keywords='pandas, yahoo finance, pandas datareader',
packages=find_packages(exclude=['contrib', 'docs', 'tests', 'examples']),
install_requires=['pandas>=1.1.0', 'numpy>=1.15',
install_requires=['pandas>=1.3.0', 'numpy>=1.16.5',
'requests>=2.26', 'multitasking>=0.0.7',
'lxml>=4.5.1', 'appdirs>=1.4.4', 'pytz>=2022.5',
'frozendict>=2.3.4',
'lxml>=4.9.1', 'appdirs>=1.4.4', 'pytz>=2022.5',
'frozendict>=2.3.4',
# 'pycryptodome>=3.6.6',
'cryptography>=3.3.2',
'beautifulsoup4>=4.11.1', 'html5lib>=1.1'],
entry_points={
'console_scripts': [

View File

@@ -36,6 +36,26 @@ class TestPriceHistory(unittest.TestCase):
f = df.index.time == _dt.time(0)
self.assertTrue(f.all())
def test_duplicatingHourly(self):
tkrs = ["IMP.JO", "BHG.JO", "SSW.JO", "BP.L", "INTC"]
for tkr in tkrs:
dat = yf.Ticker(tkr, session=self.session)
tz = dat._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
dt_utc = _tz.timezone("UTC").localize(_dt.datetime.utcnow())
dt = dt_utc.astimezone(_tz.timezone(tz))
df = dat.history(start=dt.date() - _dt.timedelta(days=1), interval="1h")
dt0 = df.index[-2]
dt1 = df.index[-1]
try:
self.assertNotEqual(dt0.hour, dt1.hour)
except:
print("Ticker = ", tkr)
raise
def test_duplicatingDaily(self):
tkrs = ["IMP.JO", "BHG.JO", "SSW.JO", "BP.L", "INTC"]
test_run = False
@@ -250,11 +270,7 @@ class TestPriceHistory(unittest.TestCase):
df = dat.history(start=start, interval="1wk")
self.assertTrue((df.index.weekday == 0).all())
def test_repair_weekly_100x(self):
# Sometimes, Yahoo returns prices 100x the correct value.
# Suspect mixup between £/pence or $/cents etc.
# E.g. ticker PNL.L
def test_repair_100x_weekly(self):
# Setup:
tkr = "PNL.L"
dat = yf.Ticker(tkr, session=self.session)
@@ -271,6 +287,7 @@ class TestPriceHistory(unittest.TestCase):
_dt.date(2022, 10, 16),
_dt.date(2022, 10, 9),
_dt.date(2022, 10, 2)]))
df = df.sort_index()
df.index.name = "Date"
df_bad = df.copy()
df_bad.loc["2022-10-23", "Close"] *= 100
@@ -285,7 +302,13 @@ class TestPriceHistory(unittest.TestCase):
# First test - no errors left
for c in data_cols:
self.assertTrue(_np.isclose(df_repaired[c], df[c], rtol=1e-2).all())
try:
self.assertTrue(_np.isclose(df_repaired[c], df[c], rtol=1e-2).all())
except:
print(df[c])
print(df_repaired[c])
raise
# Second test - all differences should be either ~1x or ~100x
ratio = df_bad[data_cols].values / df[data_cols].values
@@ -298,11 +321,7 @@ class TestPriceHistory(unittest.TestCase):
f_1 = ratio == 1
self.assertTrue((f_100 | f_1).all())
def test_repair_weekly_preSplit_100x(self):
# Sometimes, Yahoo returns prices 100x the correct value.
# Suspect mixup between £/pence or $/cents etc.
# E.g. ticker PNL.L
def test_repair_100x_weekly_preSplit(self):
# PNL.L has a stock-split in 2022. Sometimes requesting data before 2022 is not split-adjusted.
tkr = "PNL.L"
@@ -320,6 +339,7 @@ class TestPriceHistory(unittest.TestCase):
_dt.date(2020, 3, 23),
_dt.date(2020, 3, 16),
_dt.date(2020, 3, 9)]))
df = df.sort_index()
# Simulate data missing split-adjustment:
df[data_cols] *= 100.0
df["Volume"] *= 0.01
@@ -358,11 +378,7 @@ class TestPriceHistory(unittest.TestCase):
f_1 = ratio == 1
self.assertTrue((f_100 | f_1).all())
def test_repair_daily_100x(self):
# Sometimes, Yahoo returns prices 100x the correct value.
# Suspect mixup between £/pence or $/cents etc.
# E.g. ticker PNL.L
def test_repair_100x_daily(self):
tkr = "PNL.L"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.info["exchangeTimezoneName"]
@@ -378,6 +394,7 @@ class TestPriceHistory(unittest.TestCase):
_dt.date(2022, 10, 31),
_dt.date(2022, 10, 28),
_dt.date(2022, 10, 27)]))
df = df.sort_index()
df.index.name = "Date"
df_bad = df.copy()
df_bad.loc["2022-11-01", "Close"] *= 100
@@ -403,10 +420,7 @@ class TestPriceHistory(unittest.TestCase):
f_1 = ratio == 1
self.assertTrue((f_100 | f_1).all())
def test_repair_daily_zeroes(self):
# Sometimes Yahoo returns price=0.0 when price obviously not zero
# E.g. ticker BBIL.L
def test_repair_zeroes_daily(self):
tkr = "BBIL.L"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.info["exchangeTimezoneName"]
@@ -420,18 +434,59 @@ class TestPriceHistory(unittest.TestCase):
index=_pd.to_datetime([_dt.datetime(2022, 11, 1),
_dt.datetime(2022, 10, 31),
_dt.datetime(2022, 10, 30)]))
df_bad = df_bad.sort_index()
df_bad.index.name = "Date"
df_bad.index = df_bad.index.tz_localize(tz_exchange)
repaired_df = dat._fix_zero_prices(df_bad, "1d", tz_exchange)
repaired_df = dat._fix_zeroes(df_bad, "1d", tz_exchange)
correct_df = df_bad.copy()
correct_df.loc[correct_df.index[0], "Open"] = 102.080002
correct_df.loc[correct_df.index[0], "Low"] = 102.032501
correct_df.loc[correct_df.index[0], "High"] = 102.080002
correct_df.loc["2022-11-01", "Open"] = 102.080002
correct_df.loc["2022-11-01", "Low"] = 102.032501
correct_df.loc["2022-11-01", "High"] = 102.080002
for c in ["Open", "Low", "High", "Close"]:
self.assertTrue(_np.isclose(repaired_df[c], correct_df[c], rtol=1e-8).all())
def test_repair_zeroes_hourly(self):
tkr = "INTC"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.info["exchangeTimezoneName"]
df_bad = _pd.DataFrame(data={"Open": [29.68, 29.49, 29.545, _np.nan, 29.485],
"High": [29.68, 29.625, 29.58, _np.nan, 29.49],
"Low": [29.46, 29.4, 29.45, _np.nan, 29.31],
"Close": [29.485, 29.545, 29.485, _np.nan, 29.325],
"Adj Close": [29.485, 29.545, 29.485, _np.nan, 29.325],
"Volume": [3258528, 2140195, 1621010, 0, 0]},
index=_pd.to_datetime([_dt.datetime(2022,11,25, 9,30),
_dt.datetime(2022,11,25, 10,30),
_dt.datetime(2022,11,25, 11,30),
_dt.datetime(2022,11,25, 12,30),
_dt.datetime(2022,11,25, 13,00)]))
df_bad = df_bad.sort_index()
df_bad.index.name = "Date"
df_bad.index = df_bad.index.tz_localize(tz_exchange)
repaired_df = dat._fix_zeroes(df_bad, "1h", tz_exchange)
correct_df = df_bad.copy()
idx = _pd.Timestamp(2022,11,25, 12,30).tz_localize(tz_exchange)
correct_df.loc[idx, "Open"] = 29.485001
correct_df.loc[idx, "High"] = 29.49
correct_df.loc[idx, "Low"] = 29.43
correct_df.loc[idx, "Close"] = 29.455
correct_df.loc[idx, "Adj Close"] = 29.455
correct_df.loc[idx, "Volume"] = 609164
for c in ["Open", "Low", "High", "Close"]:
try:
self.assertTrue(_np.isclose(repaired_df[c], correct_df[c], rtol=1e-7).all())
except:
print("COLUMN", c)
print(repaired_df)
print(correct_df[c])
print(repaired_df[c] - correct_df[c])
raise
if __name__ == '__main__':
unittest.main()

View File

@@ -65,6 +65,7 @@ class TestTicker(unittest.TestCase):
dat.splits
dat.actions
dat.shares
dat.get_shares_full()
dat.info
dat.calendar
dat.recommendations
@@ -100,6 +101,7 @@ class TestTicker(unittest.TestCase):
dat.splits
dat.actions
dat.shares
dat.get_shares_full()
dat.info
dat.calendar
dat.recommendations
@@ -128,13 +130,48 @@ class TestTicker(unittest.TestCase):
class TestTickerHistory(unittest.TestCase):
session = None
@classmethod
def setUpClass(cls):
cls.session = requests_cache.CachedSession(backend='memory')
@classmethod
def tearDownClass(cls):
if cls.session is not None:
cls.session.close()
def setUp(self):
# use a ticker that has dividends
self.ticker = yf.Ticker("IBM")
self.ticker = yf.Ticker("IBM", session=self.session)
def tearDown(self):
self.ticker = None
def test_history(self):
with self.assertRaises(RuntimeError):
self.ticker.history_metadata
data = self.ticker.history("1y")
self.assertIn("IBM", self.ticker.history_metadata.values(), "metadata missing")
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
def test_no_expensive_calls_introduced(self):
"""
Make sure calling history to get price data has not introduced more calls to yahoo than absolutely necessary.
As doing other type of scraping calls than "query2.finance.yahoo.com/v8/finance/chart" to yahoo website
will quickly trigger spam-block when doing bulk download of history data.
"""
session = requests_cache.CachedSession(backend='memory')
ticker = yf.Ticker("GOOGL", session=session)
ticker.history("1y")
actual_urls_called = tuple([r.url for r in session.cache.filter()])
session.close()
expected_urls = (
'https://query2.finance.yahoo.com/v8/finance/chart/GOOGL?range=1y&interval=1d&includePrePost=False&events=div%2Csplits%2CcapitalGains',
)
self.assertEqual(expected_urls, actual_urls_called, "Different than expected url used to fetch history.")
def test_dividends(self):
data = self.ticker.dividends
self.assertIsInstance(data, pd.Series, "data has wrong type")
@@ -152,9 +189,19 @@ class TestTickerHistory(unittest.TestCase):
class TestTickerEarnings(unittest.TestCase):
session = None
@classmethod
def setUpClass(cls):
cls.session = requests_cache.CachedSession(backend='memory')
@classmethod
def tearDownClass(cls):
if cls.session is not None:
cls.session.close()
def setUp(self):
self.ticker = yf.Ticker("GOOGL")
self.ticker = yf.Ticker("GOOGL", session=self.session)
def tearDown(self):
self.ticker = None
@@ -213,9 +260,19 @@ class TestTickerEarnings(unittest.TestCase):
class TestTickerHolders(unittest.TestCase):
session = None
@classmethod
def setUpClass(cls):
cls.session = requests_cache.CachedSession(backend='memory')
@classmethod
def tearDownClass(cls):
if cls.session is not None:
cls.session.close()
def setUp(self):
self.ticker = yf.Ticker("GOOGL")
self.ticker = yf.Ticker("GOOGL", session=self.session)
def tearDown(self):
self.ticker = None
@@ -246,72 +303,291 @@ class TestTickerHolders(unittest.TestCase):
class TestTickerMiscFinancials(unittest.TestCase):
session = None
@classmethod
def setUpClass(cls):
cls.session = requests_cache.CachedSession(backend='memory')
@classmethod
def tearDownClass(cls):
if cls.session is not None:
cls.session.close()
def setUp(self):
self.ticker = yf.Ticker("GOOGL")
self.ticker = yf.Ticker("GOOGL", session=self.session)
# For ticker 'BSE.AX' (and others), Yahoo not returning
# full quarterly financials (usually cash-flow) with all entries,
# instead returns a smaller version in different data store.
self.ticker_old_fmt = yf.Ticker("BSE.AX", session=self.session)
def tearDown(self):
self.ticker = None
def test_income_statement(self):
expected_row = "TotalRevenue"
data = self.ticker.income_stmt
expected_keys = ["Total Revenue", "Basic EPS"]
expected_periods_days = 365
# Test contents of table
data = self.ticker.get_income_stmt(pretty=True)
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
self.assertIn(expected_row, data.index, "Did not find expected row in index")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
period = abs((data.columns[0]-data.columns[1]).days)
self.assertLess(abs(period-expected_periods_days), 20, "Not returning annual financials")
# Test property defaults
data2 = self.ticker.income_stmt
self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
# Test pretty=False
expected_keys = [k.replace(' ', '') for k in expected_keys]
data = self.ticker.get_income_stmt(pretty=False)
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
# Test to_dict
data = self.ticker.get_income_stmt(as_dict=True)
self.assertIsInstance(data, dict, "data has wrong type")
data_cached = self.ticker.income_stmt
self.assertIs(data, data_cached, "data not cached")
def test_quarterly_income_statement(self):
expected_keys = ["Total Revenue", "Basic EPS"]
expected_periods_days = 365//4
# Test contents of table
data = self.ticker.get_income_stmt(pretty=True, freq="quarterly")
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
period = abs((data.columns[0]-data.columns[1]).days)
self.assertLess(abs(period-expected_periods_days), 20, "Not returning quarterly financials")
# Test property defaults
data2 = self.ticker.quarterly_income_stmt
self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
# Test pretty=False
expected_keys = [k.replace(' ', '') for k in expected_keys]
data = self.ticker.get_income_stmt(pretty=False, freq="quarterly")
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
# Test to_dict
data = self.ticker.get_income_stmt(as_dict=True)
self.assertIsInstance(data, dict, "data has wrong type")
def test_quarterly_income_statement_old_fmt(self):
expected_row = "TotalRevenue"
data = self.ticker.quarterly_income_stmt
data = self.ticker_old_fmt.get_income_stmt(freq="quarterly", legacy=True)
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
self.assertIn(expected_row, data.index, "Did not find expected row in index")
data_cached = self.ticker.quarterly_income_stmt
data_cached = self.ticker_old_fmt.get_income_stmt(freq="quarterly", legacy=True)
self.assertIs(data, data_cached, "data not cached")
def test_balance_sheet(self):
expected_row = "TotalAssets"
data = self.ticker.balance_sheet
expected_keys = ["Total Assets", "Net PPE"]
expected_periods_days = 365
# Test contents of table
data = self.ticker.get_balance_sheet(pretty=True)
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
self.assertIn(expected_row, data.index, "Did not find expected row in index")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
period = abs((data.columns[0]-data.columns[1]).days)
self.assertLess(abs(period-expected_periods_days), 20, "Not returning annual financials")
data_cached = self.ticker.balance_sheet
self.assertIs(data, data_cached, "data not cached")
# Test property defaults
data2 = self.ticker.balance_sheet
self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
# Test pretty=False
expected_keys = [k.replace(' ', '') for k in expected_keys]
data = self.ticker.get_balance_sheet(pretty=False)
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
# Test to_dict
data = self.ticker.get_balance_sheet(as_dict=True)
self.assertIsInstance(data, dict, "data has wrong type")
def test_quarterly_balance_sheet(self):
expected_keys = ["Total Assets", "Net PPE"]
expected_periods_days = 365//4
# Test contents of table
data = self.ticker.get_balance_sheet(pretty=True, freq="quarterly")
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
period = abs((data.columns[0]-data.columns[1]).days)
self.assertLess(abs(period-expected_periods_days), 20, "Not returning quarterly financials")
# Test property defaults
data2 = self.ticker.quarterly_balance_sheet
self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
# Test pretty=False
expected_keys = [k.replace(' ', '') for k in expected_keys]
data = self.ticker.get_balance_sheet(pretty=False, freq="quarterly")
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
# Test to_dict
data = self.ticker.get_balance_sheet(as_dict=True, freq="quarterly")
self.assertIsInstance(data, dict, "data has wrong type")
def test_quarterly_balance_sheet_old_fmt(self):
expected_row = "TotalAssets"
data = self.ticker.quarterly_balance_sheet
data = self.ticker_old_fmt.get_balance_sheet(freq="quarterly", legacy=True)
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
self.assertIn(expected_row, data.index, "Did not find expected row in index")
data_cached = self.ticker.quarterly_balance_sheet
data_cached = self.ticker_old_fmt.get_balance_sheet(freq="quarterly", legacy=True)
self.assertIs(data, data_cached, "data not cached")
def test_cashflow(self):
expected_row = "OperatingCashFlow"
data = self.ticker.cashflow
def test_cash_flow(self):
expected_keys = ["Operating Cash Flow", "Net PPE Purchase And Sale"]
expected_periods_days = 365
# Test contents of table
data = self.ticker.get_cashflow(pretty=True)
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
period = abs((data.columns[0]-data.columns[1]).days)
self.assertLess(abs(period-expected_periods_days), 20, "Not returning annual financials")
# Test property defaults
data2 = self.ticker.cashflow
self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
# Test pretty=False
expected_keys = [k.replace(' ', '') for k in expected_keys]
data = self.ticker.get_cashflow(pretty=False)
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
# Test to_dict
data = self.ticker.get_cashflow(as_dict=True)
self.assertIsInstance(data, dict, "data has wrong type")
def test_quarterly_cash_flow(self):
expected_keys = ["Operating Cash Flow", "Net PPE Purchase And Sale"]
expected_periods_days = 365//4
# Test contents of table
data = self.ticker.get_cashflow(pretty=True, freq="quarterly")
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
period = abs((data.columns[0]-data.columns[1]).days)
self.assertLess(abs(period-expected_periods_days), 20, "Not returning quarterly financials")
# Test property defaults
data2 = self.ticker.quarterly_cashflow
self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
# Test pretty=False
expected_keys = [k.replace(' ', '') for k in expected_keys]
data = self.ticker.get_cashflow(pretty=False, freq="quarterly")
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
# Test to_dict
data = self.ticker.get_cashflow(as_dict=True)
self.assertIsInstance(data, dict, "data has wrong type")
def test_quarterly_cashflow_old_fmt(self):
expected_row = "NetIncome"
data = self.ticker_old_fmt.get_cashflow(legacy=True, freq="quarterly")
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
self.assertIn(expected_row, data.index, "Did not find expected row in index")
data_cached = self.ticker.cashflow
data_cached = self.ticker_old_fmt.get_cashflow(legacy=True, freq="quarterly")
self.assertIs(data, data_cached, "data not cached")
def test_quarterly_cashflow(self):
expected_row = "OperatingCashFlow"
data = self.ticker.quarterly_cashflow
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
self.assertIn(expected_row, data.index, "Did not find expected row in index")
def test_income_alt_names(self):
i1 = self.ticker.income_stmt
i2 = self.ticker.incomestmt
self.assertTrue(i1.equals(i2))
i3 = self.ticker.financials
self.assertTrue(i1.equals(i3))
data_cached = self.ticker.quarterly_cashflow
self.assertIs(data, data_cached, "data not cached")
i1 = self.ticker.get_income_stmt()
i2 = self.ticker.get_incomestmt()
self.assertTrue(i1.equals(i2))
i3 = self.ticker.get_financials()
self.assertTrue(i1.equals(i3))
i1 = self.ticker.quarterly_income_stmt
i2 = self.ticker.quarterly_incomestmt
self.assertTrue(i1.equals(i2))
i3 = self.ticker.quarterly_financials
self.assertTrue(i1.equals(i3))
i1 = self.ticker.get_income_stmt(freq="quarterly")
i2 = self.ticker.get_incomestmt(freq="quarterly")
self.assertTrue(i1.equals(i2))
i3 = self.ticker.get_financials(freq="quarterly")
self.assertTrue(i1.equals(i3))
def test_balance_sheet_alt_names(self):
i1 = self.ticker.balance_sheet
i2 = self.ticker.balancesheet
self.assertTrue(i1.equals(i2))
i1 = self.ticker.get_balance_sheet()
i2 = self.ticker.get_balancesheet()
self.assertTrue(i1.equals(i2))
i1 = self.ticker.quarterly_balance_sheet
i2 = self.ticker.quarterly_balancesheet
self.assertTrue(i1.equals(i2))
i1 = self.ticker.get_balance_sheet(freq="quarterly")
i2 = self.ticker.get_balancesheet(freq="quarterly")
self.assertTrue(i1.equals(i2))
def test_cash_flow_alt_names(self):
i1 = self.ticker.cash_flow
i2 = self.ticker.cashflow
self.assertTrue(i1.equals(i2))
i1 = self.ticker.get_cash_flow()
i2 = self.ticker.get_cashflow()
self.assertTrue(i1.equals(i2))
i1 = self.ticker.quarterly_cash_flow
i2 = self.ticker.quarterly_cashflow
self.assertTrue(i1.equals(i2))
i1 = self.ticker.get_cash_flow(freq="quarterly")
i2 = self.ticker.get_cashflow(freq="quarterly")
self.assertTrue(i1.equals(i2))
def test_sustainability(self):
data = self.ticker.sustainability
@@ -379,6 +655,11 @@ class TestTickerMiscFinancials(unittest.TestCase):
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
def test_shares_full(self):
data = self.ticker.get_shares_full()
self.assertIsInstance(data, pd.Series, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
def test_info(self):
data = self.ticker.info
self.assertIsInstance(data, dict, "data has wrong type")

View File

@@ -40,6 +40,8 @@ from .scrapers.analysis import Analysis
from .scrapers.fundamentals import Fundamentals
from .scrapers.holders import Holders
from .scrapers.quote import Quote
from .scrapers.stats import KeyStats
import json as _json
_BASE_URL_ = 'https://query2.finance.yahoo.com'
_SCRAPE_URL_ = 'https://finance.yahoo.com/quote'
@@ -51,6 +53,7 @@ class TickerBase:
self.ticker = ticker.upper()
self.session = session
self._history = None
self._history_metadata = None
self._base_url = _BASE_URL_
self._scrape_url = _SCRAPE_URL_
self._tz = None
@@ -73,15 +76,9 @@ class TickerBase:
self._analysis = Analysis(self._data)
self._holders = Holders(self._data)
self._quote = Quote(self._data)
self._stats = KeyStats(self._data)
self._fundamentals = Fundamentals(self._data)
def stats(self, proxy=None):
ticker_url = "{}/{}".format(self._scrape_url, self.ticker)
# get info and sustainability
data = self._data.get_json_data_stores(proxy=proxy)["QuoteSummaryStore"]
return data
def history(self, period="1mo", interval="1d",
start=None, end=None, prepost=False, actions=True,
auto_adjust=True, back_adjust=False, repair=False, keepna=False,
@@ -177,11 +174,7 @@ class TickerBase:
proxy = {"https": proxy}
#if the ticker is MUTUALFUND or ETF, then get capitalGains events
data = self.get_info(proxy)
if not data is None and 'quoteType' in data and data['quoteType'] in ('MUTUALFUND', 'ETF'):
params["events"] = "div,splits,capitalGains"
else:
params["events"] = "div,splits"
params["events"] = "div,splits,capitalGains"
# Getting data from json
url = "{}/v8/finance/chart/{}".format(self._base_url, self.ticker)
@@ -189,7 +182,15 @@ class TickerBase:
data = None
try:
data = self._data.get(
get_fn = self._data.get
if end is not None:
end_dt = _pd.Timestamp(end, unit='s').tz_localize("UTC")
dt_now = end_dt.tzinfo.localize(_datetime.datetime.utcnow())
data_delay = _datetime.timedelta(minutes=30)
if end_dt+data_delay <= dt_now:
# Date range in past so safe to fetch through cache:
get_fn = self._data.cache_get
data = get_fn(
url=url,
params=params,
timeout=timeout
@@ -203,6 +204,12 @@ class TickerBase:
except Exception:
pass
# Store the meta data that gets retrieved simultaneously
try:
self._history_metadata = data["chart"]["result"][0]["meta"]
except Exception:
self._history_metadata = {}
err_msg = "No data found for this date range, symbol may be delisted"
fail = False
if data is None or not type(data) is dict:
@@ -216,9 +223,9 @@ class TickerBase:
elif "chart" not in data or data["chart"]["result"] is None or not data["chart"]["result"]:
fail = True
elif period is not None and "timestamp" not in data["chart"]["result"][0] and period not in \
data["chart"]["result"][0]["meta"]["validRanges"]:
self._history_metadata["validRanges"]:
# User provided a bad period. The minimum should be '1d', but sometimes Yahoo accepts '1h'.
err_msg = "Period '{}' is invalid, must be one of {}".format(period, data["chart"]["result"][0]["meta"][
err_msg = "Period '{}' is invalid, must be one of {}".format(period, self._history_metadata[
"validRanges"])
fail = True
if fail:
@@ -230,7 +237,7 @@ class TickerBase:
else:
print('%s: %s' % (self.ticker, err_msg))
return utils.empty_df()
# parse quotes
try:
quotes = utils.parse_quotes(data["chart"]["result"][0])
@@ -269,50 +276,28 @@ class TickerBase:
except Exception:
pass
tz_exchange = data["chart"]["result"][0]["meta"]["exchangeTimezoneName"]
# Select useful info from metadata
quote_type = self._history_metadata["instrumentType"]
expect_capital_gains = quote_type in ('MUTUALFUND', 'ETF')
tz_exchange = self._history_metadata["exchangeTimezoneName"]
# Note: ordering is important. If you change order, run the tests!
quotes = utils.set_df_tz(quotes, params["interval"], tz_exchange)
quotes = utils.fix_Yahoo_dst_issue(quotes, params["interval"])
quotes = utils.fix_Yahoo_returning_live_separate(quotes, params["interval"], tz_exchange)
if repair:
# Do this before auto/back adjust
quotes = self._fix_zero_prices(quotes, interval, tz_exchange)
quotes = self._fix_unit_mixups(quotes, interval, tz_exchange)
# Auto/back adjust
try:
if auto_adjust:
quotes = utils.auto_adjust(quotes)
elif back_adjust:
quotes = utils.back_adjust(quotes)
except Exception as e:
if auto_adjust:
err_msg = "auto_adjust failed with %s" % e
else:
err_msg = "back_adjust failed with %s" % e
shared._DFS[self.ticker] = utils.empty_df()
shared._ERRORS[self.ticker] = err_msg
if debug:
if raise_errors:
raise Exception('%s: %s' % (self.ticker, err_msg))
else:
print('%s: %s' % (self.ticker, err_msg))
if rounding:
quotes = _np.round(quotes, data[
"chart"]["result"][0]["meta"]["priceHint"])
quotes['Volume'] = quotes['Volume'].fillna(0).astype(_np.int64)
# actions
dividends, splits, capital_gains = utils.parse_actions(data["chart"]["result"][0])
if not expect_capital_gains:
capital_gains = None
if start is not None:
# Note: use pandas Timestamp as datetime.utcfromtimestamp has bugs on windows
# https://github.com/python/cpython/issues/81708
startDt = _pd.Timestamp(start, unit='s')
if dividends is not None:
dividends = dividends[dividends.index>=startDt]
if "capitalGains" in params["events"] and capital_gains is not None:
if capital_gains is not None:
capital_gains = capital_gains[capital_gains.index>=startDt]
if splits is not None:
splits = splits[splits.index >= startDt]
@@ -320,7 +305,7 @@ class TickerBase:
endDt = _pd.Timestamp(end, unit='s')
if dividends is not None:
dividends = dividends[dividends.index<endDt]
if "capitalGains" in params["events"] and capital_gains is not None:
if capital_gains is not None:
capital_gains = capital_gains[capital_gains.index<endDt]
if splits is not None:
splits = splits[splits.index < endDt]
@@ -336,11 +321,11 @@ class TickerBase:
if not intraday:
# If localizing a midnight during DST transition hour when clocks roll back,
# meaning clock hits midnight twice, then use the 2nd (ambiguous=True)
quotes.index = _pd.to_datetime(quotes.index.date).tz_localize(tz_exchange, ambiguous=True)
quotes.index = _pd.to_datetime(quotes.index.date).tz_localize(tz_exchange, ambiguous=True, nonexistent='shift_forward')
if dividends.shape[0] > 0:
dividends.index = _pd.to_datetime(dividends.index.date).tz_localize(tz_exchange, ambiguous=True)
dividends.index = _pd.to_datetime(dividends.index.date).tz_localize(tz_exchange, ambiguous=True, nonexistent='shift_forward')
if splits.shape[0] > 0:
splits.index = _pd.to_datetime(splits.index.date).tz_localize(tz_exchange, ambiguous=True)
splits.index = _pd.to_datetime(splits.index.date).tz_localize(tz_exchange, ambiguous=True, nonexistent='shift_forward')
# Combine
df = quotes.sort_index()
@@ -356,7 +341,7 @@ class TickerBase:
df.loc[df["Stock Splits"].isna(), "Stock Splits"] = 0
else:
df["Stock Splits"] = 0.0
if "capitalGains" in params["events"]:
if expect_capital_gains:
if capital_gains.shape[0] > 0:
df = utils.safe_merge_dfs(df, capital_gains, interval)
if "Capital Gains" in df.columns:
@@ -364,6 +349,35 @@ class TickerBase:
else:
df["Capital Gains"] = 0.0
if repair:
# Do this before auto/back adjust
df = self._fix_zeroes(df, interval, tz_exchange)
df = self._fix_unit_mixups(df, interval, tz_exchange)
# Auto/back adjust
try:
if auto_adjust:
df = utils.auto_adjust(df)
elif back_adjust:
df = utils.back_adjust(df)
except Exception as e:
if auto_adjust:
err_msg = "auto_adjust failed with %s" % e
else:
err_msg = "back_adjust failed with %s" % e
shared._DFS[self.ticker] = utils.empty_df()
shared._ERRORS[self.ticker] = err_msg
if debug:
if raise_errors:
raise Exception('%s: %s' % (self.ticker, err_msg))
else:
print('%s: %s' % (self.ticker, err_msg))
if rounding:
df = _np.round(df, data[
"chart"]["result"][0]["meta"]["priceHint"])
df['Volume'] = df['Volume'].fillna(0).astype(_np.int64)
if intraday:
df.index.name = "Datetime"
else:
@@ -382,13 +396,14 @@ class TickerBase:
# ------------------------
def _reconstruct_interval(self, df_row, interval, bad_fields):
if isinstance(df_row, _pd.DataFrame) or not isinstance(df_row, _pd.Series):
raise Exception("'df_row' must be a Pandas Series not", type(df_row))
if not isinstance(bad_fields, (list, set, _np.ndarray)):
raise Exception("'bad_fields' must be a list/set not", type(bad_fields))
def _reconstruct_intervals_batch(self, df, interval, tag=-1):
if not isinstance(df, _pd.DataFrame):
raise Exception("'df' must be a Pandas DataFrame not", type(df))
data_cols = [c for c in ["Open", "High", "Low", "Close", "Adj Close"] if c in df_row.index]
# Reconstruct values in df using finer-grained price data. Delimiter marks what to reconstruct
price_cols = [c for c in ["Open", "High", "Low", "Close", "Adj Close"] if c in df]
data_cols = price_cols + ["Volume"]
# If interval is weekly then can construct with daily. But if smaller intervals then
# restricted to recent times:
@@ -403,45 +418,163 @@ class TickerBase:
# Correct by fetching day of hourly data
sub_interval = "1h"
td_range = _datetime.timedelta(days=1)
elif interval == "1h":
sub_interval = "30m"
td_range = _datetime.timedelta(hours=1)
else:
print("WARNING: Have not implemented repair for '{}' interval. Contact developers".format(interval))
return df_row
raise Exception("why here")
return df
idx = df_row.name
start = idx.date()
if sub_interval == "1h" and (_datetime.date.today() - start) > _datetime.timedelta(days=729):
# Don't bother requesting more price data, Yahoo will reject
return None
df = df.sort_index()
f_repair = df[data_cols].to_numpy()==tag
f_repair_rows = f_repair.any(axis=1)
# Ignore old intervals for which Yahoo won't return finer data:
if sub_interval == "1h":
f_recent = _datetime.date.today() - df.index.date < _datetime.timedelta(days=730)
f_repair_rows = f_repair_rows & f_recent
elif sub_interval in ["30m", "15m"]:
f_recent = _datetime.date.today() - df.index.date < _datetime.timedelta(days=60)
f_repair_rows = f_repair_rows & f_recent
if not f_repair_rows.any():
print("data too old to fix")
return df
dts_to_repair = df.index[f_repair_rows]
indices_to_repair = _np.where(f_repair_rows)[0]
if len(dts_to_repair) == 0:
return df
df_v2 = df.copy()
df_noNa = df[~df[price_cols].isna().any(axis=1)]
# Group nearby NaN-intervals together to reduce number of Yahoo fetches
dts_groups = [[dts_to_repair[0]]]
last_dt = dts_to_repair[0]
last_ind = indices_to_repair[0]
td = utils._interval_to_timedelta(interval)
if interval == "1mo":
grp_td_threshold = _datetime.timedelta(days=28)
elif interval == "1wk":
grp_td_threshold = _datetime.timedelta(days=28)
elif interval == "1d":
grp_td_threshold = _datetime.timedelta(days=14)
elif interval == "1h":
grp_td_threshold = _datetime.timedelta(days=7)
else:
new_vals = {}
if sub_interval == "1h":
df_fine = self.history(start=start, end=start + td_range, interval=sub_interval, auto_adjust=False)
grp_td_threshold = _datetime.timedelta(days=2)
# grp_td_threshold = _datetime.timedelta(days=7)
for i in range(1, len(dts_to_repair)):
ind = indices_to_repair[i]
dt = dts_to_repair[i]
if (dt-dts_groups[-1][-1]) < grp_td_threshold:
dts_groups[-1].append(dt)
elif ind - last_ind <= 3:
dts_groups[-1].append(dt)
else:
df_fine = self.history(start=start - td_range, end=start + td_range, interval=sub_interval,
auto_adjust=False)
dts_groups.append([dt])
last_dt = dt
last_ind = ind
# First, check whether df_fine has different split-adjustment than df_row.
# If it is different, then adjust df_fine to match df_row
good_fields = list(set(data_cols) - set(bad_fields) - set("Adj Close"))
if len(good_fields) == 0:
raise Exception(
"No good fields, so cannot determine whether different split-adjustment. Contact developers")
# median = df_row.loc[good_fields].median()
# median_fine = _np.median(df_fine[good_fields].values)
# ratio = median/median_fine
# Better method to calculate split-adjustment:
df_fine_from_idx = df_fine[df_fine.index >= idx]
ratios = []
for f in good_fields:
if f == "Low":
ratios.append(df_row[f] / df_fine_from_idx[f].min())
elif f == "High":
ratios.append(df_row[f] / df_fine_from_idx[f].max())
elif f == "Open":
ratios.append(df_row[f] / df_fine_from_idx[f].iloc[0])
elif f == "Close":
ratios.append(df_row[f] / df_fine_from_idx[f].iloc[-1])
# Add some good data to each group, so can calibrate later:
for i in range(len(dts_groups)):
g = dts_groups[i]
g0 = g[0]
i0 = df_noNa.index.get_loc(g0)
if i0 > 0:
dts_groups[i].insert(0, df_noNa.index[i0-1])
gl = g[-1]
il = df_noNa.index.get_loc(gl)
if il < len(df_noNa)-1:
dts_groups[i].append(df_noNa.index[il+1])
n_fixed = 0
for g in dts_groups:
df_block = df[df.index.isin(g)]
start_dt = g[0]
start_d = start_dt.date()
if sub_interval == "1h" and (_datetime.date.today() - start_d) > _datetime.timedelta(days=729):
# Don't bother requesting more price data, Yahoo will reject
continue
elif sub_interval in ["30m", "15m"] and (_datetime.date.today() - start_d) > _datetime.timedelta(days=59):
# Don't bother requesting more price data, Yahoo will reject
continue
td_1d = _datetime.timedelta(days=1)
if interval in "1wk":
fetch_start = start_d - td_range # need previous week too
fetch_end = g[-1].date() + td_range
elif interval == "1d":
fetch_start = start_d
fetch_end = g[-1].date() + td_range
else:
fetch_start = g[0]
fetch_end = g[-1] + td_range
prepost = interval == "1d"
df_fine = self.history(start=fetch_start, end=fetch_end, interval=sub_interval, auto_adjust=False, prepost=prepost, repair=False, keepna=True)
if df_fine is None or df_fine.empty:
print("YF: WARNING: Cannot reconstruct because Yahoo not returning data in interval")
continue
df_fine["ctr"] = 0
if interval == "1wk":
# df_fine["Week Start"] = df_fine.index.tz_localize(None).to_period("W-SUN").start_time
weekdays = ["MON", "TUE", "WED", "THU", "FRI", "SAT", "SUN"]
week_end_day = weekdays[(df_block.index[0].weekday()+7-1)%7]
df_fine["Week Start"] = df_fine.index.tz_localize(None).to_period("W-"+week_end_day).start_time
grp_col = "Week Start"
elif interval == "1d":
df_fine["Day Start"] = pd.to_datetime(df_fine.index.date)
grp_col = "Day Start"
else:
df_fine.loc[df_fine.index.isin(df_block.index), "ctr"] = 1
df_fine["intervalID"] = df_fine["ctr"].cumsum()
df_fine = df_fine.drop("ctr", axis=1)
grp_col = "intervalID"
df_fine = df_fine[~df_fine[price_cols].isna().all(axis=1)]
df_new = df_fine.groupby(grp_col).agg(
Open=("Open", "first"),
Close=("Close", "last"),
AdjClose=("Adj Close", "last"),
Low=("Low", "min"),
High=("High", "max"),
Volume=("Volume", "sum")).rename(columns={"AdjClose":"Adj Close"})
if grp_col in ["Week Start", "Day Start"]:
df_new.index = df_new.index.tz_localize(df_fine.index.tz)
else:
df_fine["diff"] = df_fine["intervalID"].diff()
new_index = _np.append([df_fine.index[0]], df_fine.index[df_fine["intervalID"].diff()>0])
df_new.index = new_index
# Calibrate! Check whether 'df_fine' has different split-adjustment.
# If different, then adjust to match 'df'
df_block_calib = df_block[price_cols]
common_index = df_block_calib.index[df_block_calib.index.isin(df_new.index)]
if len(common_index) == 0:
# Can't calibrate so don't attempt repair
continue
df_new_calib = df_new[df_new.index.isin(common_index)][price_cols]
df_block_calib = df_block_calib[df_block_calib.index.isin(common_index)]
calib_filter = (df_block_calib != tag).to_numpy()
if not calib_filter.any():
# Can't calibrate so don't attempt repair
continue
# Avoid divide-by-zero warnings printing:
df_new_calib = df_new_calib.to_numpy()
df_block_calib = df_block_calib.to_numpy()
for j in range(len(price_cols)):
c = price_cols[j]
f = ~calib_filter[:,j]
if f.any():
df_block_calib[f,j] = 1
df_new_calib[f,j] = 1
ratios = (df_block_calib / df_new_calib)[calib_filter]
ratio = _np.mean(ratios)
#
ratio_rcp = round(1.0 / ratio, 1)
@@ -453,36 +586,51 @@ class TickerBase:
if ratio > 1:
# data has different split-adjustment than fine-grained data
# Adjust fine-grained to match
df_fine[data_cols] *= ratio
df_new[price_cols] *= ratio
df_new["Volume"] /= ratio
elif ratio_rcp > 1:
# data has different split-adjustment than fine-grained data
# Adjust fine-grained to match
df_fine[data_cols] *= 1.0 / ratio_rcp
df_new[price_cols] *= 1.0 / ratio_rcp
df_new["Volume"] *= ratio_rcp
if sub_interval != "1h":
df_last_week = df_fine[df_fine.index < idx]
df_fine = df_fine[df_fine.index >= idx]
# Repair!
bad_dts = df_block.index[(df_block[price_cols]==tag).any(axis=1)]
if "High" in bad_fields:
new_vals["High"] = df_fine["High"].max()
if "Low" in bad_fields:
new_vals["Low"] = df_fine["Low"].min()
if "Open" in bad_fields:
if sub_interval != "1h" and idx != df_fine.index[0]:
# Exchange closed Monday. In this case, Yahoo sets Open to last week close
new_vals["Open"] = df_last_week["Close"][-1]
if "Low" in new_vals:
new_vals["Low"] = min(new_vals["Open"], new_vals["Low"])
elif new_vals["Open"] < df_row["Low"]:
new_vals["Low"] = new_vals["Open"]
else:
new_vals["Open"] = df_fine["Open"].iloc[0]
if "Close" in bad_fields:
new_vals["Close"] = df_fine["Close"].iloc[-1]
# Assume 'Adj Close' also corrupted, easier than detecting whether true
new_vals["Adj Close"] = df_fine["Adj Close"].iloc[-1]
for idx in bad_dts:
if not idx in df_new.index:
# Yahoo didn't return finer-grain data for this interval,
# so probably no trading happened.
# print("no fine data")
continue
df_new_row = df_new.loc[idx]
return new_vals
if interval == "1wk":
df_last_week = df_new.iloc[df_new.index.get_loc(idx)-1]
df_fine = df_fine.loc[idx:]
df_bad_row = df.loc[idx]
bad_fields = df_bad_row.index[df_bad_row==tag].values
if "High" in bad_fields:
df_v2.loc[idx, "High"] = df_new_row["High"]
if "Low" in bad_fields:
df_v2.loc[idx, "Low"] = df_new_row["Low"]
if "Open" in bad_fields:
if interval == "1wk" and idx != df_fine.index[0]:
# Exchange closed Monday. In this case, Yahoo sets Open to last week close
df_v2.loc[idx, "Open"] = df_last_week["Close"]
df_v2.loc[idx, "Low"] = min(df_v2.loc[idx, "Open"], df_v2.loc[idx, "Low"])
else:
df_v2.loc[idx, "Open"] = df_new_row["Open"]
if "Close" in bad_fields:
df_v2.loc[idx, "Close"] = df_new_row["Close"]
# Assume 'Adj Close' also corrupted, easier than detecting whether true
df_v2.loc[idx, "Adj Close"] = df_new_row["Adj Close"]
if "Volume" in bad_fields:
df_v2.loc[idx, "Volume"] = df_new_row["Volume"]
n_fixed += 1
return df_v2
def _fix_unit_mixups(self, df, interval, tz_exchange):
# Sometimes Yahoo returns few prices in cents/pence instead of $/£
@@ -508,71 +656,93 @@ class TickerBase:
data_cols = ["High", "Open", "Low", "Close"] # Order important, separate High from Low
data_cols = [c for c in data_cols if c in df2.columns]
f_zeroes = (df2[data_cols]==0).any(axis=1)
if f_zeroes.any():
df2_zeroes = df2[f_zeroes]
df2 = df2[~f_zeroes]
else:
df2_zeroes = None
if df2.shape[0] <= 1:
return df
median = _ndimage.median_filter(df2[data_cols].values, size=(3, 3), mode="wrap")
if (median == 0).any():
raise Exception("median contains zeroes, why?")
ratio = df2[data_cols].values / median
ratio_rounded = (ratio / 20).round() * 20 # round ratio to nearest 20
f = ratio_rounded == 100
if not f.any():
return df
# Store each mixup:
mixups = {}
for j in range(len(data_cols)):
fj = f[:, j]
if fj.any():
dc = data_cols[j]
for i in _np.where(fj)[0]:
idx = df2.index[i]
if idx not in mixups:
mixups[idx] = {"data": df2.loc[idx, data_cols], "fields": {dc}}
else:
mixups[idx]["fields"].add(dc)
n_mixups = len(mixups)
# Mark values to send for repair
tag = -1.0
for i in range(len(data_cols)):
fi = f[:,i]
c = data_cols[i]
df2.loc[fi, c] = tag
if len(mixups) > 0:
# This first pass will correct all errors in Open/Close/AdjClose columns.
# It will also attempt to correct Low/High columns, but only if can get price data.
for idx in sorted(list(mixups.keys())):
m = mixups[idx]
new_values = self._reconstruct_interval(df2.loc[idx], interval, m["fields"])
if not new_values is None:
for k in new_values:
df2.loc[idx, k] = new_values[k]
del mixups[idx]
n_before = (df2[data_cols].to_numpy()==tag).sum()
df2 = self._reconstruct_intervals_batch(df2, interval, tag=tag)
n_after = (df2[data_cols].to_numpy()==tag).sum()
if n_after > 0:
# This second pass will *crudely* "fix" any remaining errors in High/Low
# simply by ensuring they don't contradict e.g. Low = 100x High
if len(mixups) > 0:
for idx in sorted(list(mixups.keys())):
m = mixups[idx]
row = df2.loc[idx, ["Open", "Close"]]
if "High" in m["fields"]:
df2.loc[idx, "High"] = row.max()
m["fields"].remove("High")
if "Low" in m["fields"]:
df2.loc[idx, "Low"] = row.min()
m["fields"].remove("Low")
# simply by ensuring they don't contradict e.g. Low = 100x High.
f = df2[data_cols].to_numpy()==tag
for i in range(f.shape[0]):
fi = f[i,:]
if not fi.any():
continue
idx = df2.index[i]
if len(m["fields"]) == 0:
del mixups[idx]
c = "Open"
j = data_cols.index(c)
if fi[j]:
df2.loc[idx, c] = df.loc[idx, c] * 0.01
#
c = "Close"
j = data_cols.index(c)
if fi[j]:
df2.loc[idx, c] = df.loc[idx, c] * 0.01
#
c = "High"
j = data_cols.index(c)
if fi[j]:
df2.loc[idx, c] = df2.loc[idx, ["Open", "Close"]].max()
#
c = "Low"
j = data_cols.index(c)
if fi[j]:
df2.loc[idx, c] = df2.loc[idx, ["Open", "Close"]].min()
n_fixed = n_mixups - len(mixups)
print("{}: fixed {} currency unit mixups in {} price data".format(self.ticker, n_fixed, interval))
if len(mixups) > 0:
print(" ... and failed to correct {}".format(len(mixups)))
n_after_crude = (df2[data_cols].to_numpy()==tag).sum()
n_fixed = n_before - n_after_crude
n_fixed_crudely = n_after - n_after_crude
if n_fixed > 0:
report_msg = f"{self.ticker}: fixed {n_fixed}/{n_before} currency unit mixups "
if n_fixed_crudely > 0:
report_msg += f"({n_fixed_crudely} crudely) "
report_msg += f"in {interval} price data"
print(report_msg)
# Restore original values where repair failed
f = df2[data_cols].values==tag
for j in range(len(data_cols)):
fj = f[:,j]
if fj.any():
c = data_cols[j]
df2.loc[fj, c] = df.loc[fj, c]
if df2_zeroes is not None:
df2 = _pd.concat([df2, df2_zeroes]).sort_index()
df2.index = _pd.to_datetime()
return df2
def _fix_zero_prices(self, df, interval, tz_exchange):
# Sometimes Yahoo returns prices=0 when obviously wrong e.g. Volume>0 and Close>0.
# Easy to detect and fix
def _fix_zeroes(self, df, interval, tz_exchange):
# Sometimes Yahoo returns prices=0 or NaN when trades occurred.
# But most times when prices=0 or NaN returned is because no trades.
# Impossible to distinguish, so only attempt repair if few or rare.
if df.shape[0] == 0:
return df
if df.shape[0] == 1:
# Need multiple rows to confidently identify outliers
return df
df2 = df.copy()
@@ -581,23 +751,45 @@ class TickerBase:
else:
df2.index = df2.index.tz_convert(tz_exchange)
data_cols = ["Open", "High", "Low", "Close"]
data_cols = [c for c in data_cols if c in df2.columns]
f_zeroes = (df2[data_cols] == 0.0).values.any(axis=1)
price_cols = [c for c in ["Open", "High", "Low", "Close", "Adj Close"] if c in df2.columns]
f_zero_or_nan = (df2[price_cols] == 0.0).values | df2[price_cols].isna().values
# Check whether worth attempting repair
if f_zero_or_nan.any(axis=1).sum() == 0:
return df
if f_zero_or_nan.sum() == len(price_cols)*len(df2):
# Need some good data to calibrate
return df
# - avoid repair if many zeroes/NaNs
pct_zero_or_nan = f_zero_or_nan.sum() / (len(price_cols)*len(df2))
if f_zero_or_nan.any(axis=1).sum()>2 and pct_zero_or_nan > 0.05:
return df
n_fixed = 0
for i in _np.where(f_zeroes)[0]:
idx = df2.index[i]
df_row = df2.loc[idx]
bad_fields = df2.columns[df_row.values == 0.0].values
new_values = self._reconstruct_interval(df2.loc[idx], interval, bad_fields)
if not new_values is None:
for k in new_values:
df2.loc[idx, k] = new_values[k]
n_fixed += 1
data_cols = price_cols + ["Volume"]
# Mark values to send for repair
tag = -1.0
for i in range(len(price_cols)):
c = price_cols[i]
df2.loc[f_zero_or_nan[:,i], c] = tag
# If volume=0 or NaN for bad prices, then tag volume for repair
df2.loc[f_zero_or_nan.any(axis=1) & (df2["Volume"]==0), "Volume"] = tag
df2.loc[f_zero_or_nan.any(axis=1) & (df2["Volume"].isna()), "Volume"] = tag
n_before = (df2[data_cols].to_numpy()==tag).sum()
df2 = self._reconstruct_intervals_batch(df2, interval, tag=tag)
n_after = (df2[data_cols].to_numpy()==tag).sum()
n_fixed = n_before - n_after
if n_fixed > 0:
print("{}: fixed {} price=0.0 errors in {} price data".format(self.ticker, n_fixed, interval))
# Restore original values where repair failed (i.e. remove tag values)
f = df2[data_cols].values==tag
for j in range(len(data_cols)):
fj = f[:,j]
if fj.any():
c = data_cols[j]
df2.loc[fj, c] = df.loc[fj, c]
return df2
def _get_ticker_tz(self, debug_mode, proxy, timeout):
@@ -632,7 +824,7 @@ class TickerBase:
url = "{}/v8/finance/chart/{}".format(self._base_url, self.ticker)
try:
data = self._data.get(url=url, params=params, proxy=proxy, timeout=timeout)
data = self._data.cache_get(url=url, params=params, proxy=proxy, timeout=timeout)
data = data.json()
except Exception as e:
if debug_mode:
@@ -698,6 +890,16 @@ class TickerBase:
data = self._quote.info
return data
def get_stats(self, proxy=None) -> dict:
self._stats.proxy = proxy
data = self._stats.stats
return data
def get_valuations(self, proxy=None) -> dict:
self._stats.proxy = proxy
data = self._stats.valuations
return data
def get_sustainability(self, proxy=None, as_dict=False):
self._quote.proxy = proxy
data = self._quote.sustainability
@@ -748,6 +950,18 @@ class TickerBase:
return data
def get_earnings(self, proxy=None, as_dict=False, freq="yearly"):
"""
:Parameters:
as_dict: bool
Return table as Python dict
Default is False
freq: str
"yearly" or "quarterly"
Default is "yearly"
proxy: str
Optional. Proxy server URL scheme
Default is None
"""
self._fundamentals.proxy = proxy
data = self._fundamentals.earnings[freq]
if as_dict:
@@ -757,27 +971,117 @@ class TickerBase:
return dict_data
return data
def get_income_stmt(self, proxy=None, as_dict=False, freq="yearly"):
def get_income_stmt(self, proxy=None, as_dict=False, pretty=False, freq="yearly", legacy=False):
"""
:Parameters:
as_dict: bool
Return table as Python dict
Default is False
pretty: bool
Format row names nicely for readability
Default is False
freq: str
"yearly" or "quarterly"
Default is "yearly"
legacy: bool
Return old financials tables. Useful for when new tables not available
Default is False
proxy: str
Optional. Proxy server URL scheme
Default is None
"""
self._fundamentals.proxy = proxy
data = self._fundamentals.financials.get_income(freq=freq, proxy=proxy)
if legacy:
data = self._fundamentals.financials.get_income_scrape(freq=freq, proxy=proxy)
else:
data = self._fundamentals.financials.get_income_time_series(freq=freq, proxy=proxy)
if pretty:
data = data.copy()
data.index = utils.camel2title(data.index, sep=' ', acronyms=["EBIT", "EBITDA", "EPS", "NI"])
if as_dict:
return data.to_dict()
return data
def get_balance_sheet(self, proxy=None, as_dict=False, freq="yearly"):
def get_incomestmt(self, proxy=None, as_dict=False, pretty=False, freq="yearly", legacy=False):
return self.get_income_stmt(proxy, as_dict, pretty, freq, legacy)
def get_financials(self, proxy=None, as_dict=False, pretty=False, freq="yearly", legacy=False):
return self.get_income_stmt(proxy, as_dict, pretty, freq, legacy)
def get_balance_sheet(self, proxy=None, as_dict=False, pretty=False, freq="yearly", legacy=False):
"""
:Parameters:
as_dict: bool
Return table as Python dict
Default is False
pretty: bool
Format row names nicely for readability
Default is False
freq: str
"yearly" or "quarterly"
Default is "yearly"
legacy: bool
Return old financials tables. Useful for when new tables not available
Default is False
proxy: str
Optional. Proxy server URL scheme
Default is None
"""
self._fundamentals.proxy = proxy
data = self._fundamentals.financials.get_balance_sheet(freq=freq, proxy=proxy)
if legacy:
data = self._fundamentals.financials.get_balance_sheet_scrape(freq=freq, proxy=proxy)
else:
data = self._fundamentals.financials.get_balance_sheet_time_series(freq=freq, proxy=proxy)
if pretty:
data = data.copy()
data.index = utils.camel2title(data.index, sep=' ', acronyms=["PPE"])
if as_dict:
return data.to_dict()
return data
def get_cashflow(self, proxy=None, as_dict=False, freq="yearly"):
def get_balancesheet(self, proxy=None, as_dict=False, pretty=False, freq="yearly", legacy=False):
return self.get_balance_sheet(proxy, as_dict, pretty, freq, legacy)
def get_cash_flow(self, proxy=None, as_dict=False, pretty=False, freq="yearly", legacy=False):
"""
:Parameters:
as_dict: bool
Return table as Python dict
Default is False
pretty: bool
Format row names nicely for readability
Default is False
freq: str
"yearly" or "quarterly"
Default is "yearly"
legacy: bool
Return old financials tables. Useful for when new tables not available
Default is False
proxy: str
Optional. Proxy server URL scheme
Default is None
"""
self._fundamentals.proxy = proxy
data = self._fundamentals.financials.get_cash_flow(freq=freq, proxy=proxy)
if legacy:
data = self._fundamentals.financials.get_cash_flow_scrape(freq=freq, proxy=proxy)
else:
data = self._fundamentals.financials.get_cash_flow_time_series(freq=freq, proxy=proxy)
if pretty:
data = data.copy()
data.index = utils.camel2title(data.index, sep=' ', acronyms=["PPE"])
if as_dict:
return data.to_dict()
return data
def get_cashflow(self, proxy=None, as_dict=False, pretty=False, freq="yearly", legacy=False):
return self.get_cash_flow(proxy, as_dict, pretty, freq, legacy)
def get_dividends(self, proxy=None):
if self._history is None:
self.history(period="max", proxy=proxy)
@@ -820,6 +1124,59 @@ class TickerBase:
return data.to_dict()
return data
def get_shares_full(self, start=None, end=None, proxy=None):
# Process dates
tz = self._get_ticker_tz(debug_mode=False, proxy=None, timeout=10)
dt_now = _pd.Timestamp.utcnow().tz_convert(tz)
if start is not None:
start_ts = utils._parse_user_dt(start, tz)
start = _pd.Timestamp.fromtimestamp(start_ts).tz_localize("UTC").tz_convert(tz)
start_d = start.date()
if end is not None:
end_ts = utils._parse_user_dt(end, tz)
end = _pd.Timestamp.fromtimestamp(end_ts).tz_localize("UTC").tz_convert(tz)
end_d = end.date()
if end is None:
end = dt_now
if start is None:
start = end - _pd.Timedelta(days=548) # 18 months
if start >= end:
print("ERROR: start date must be before end")
return None
start = start.floor("D")
end = end.ceil("D")
# Fetch
ts_url_base = "https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{0}?symbol={0}".format(self.ticker)
shares_url = ts_url_base + "&period1={}&period2={}".format(int(start.timestamp()), int(end.timestamp()))
try:
json_str = self._data.cache_get(shares_url).text
json_data = _json.loads(json_str)
except:
print(f"{self.ticker}: Yahoo web request for share count failed")
return None
try:
fail = json_data["finance"]["error"]["code"] == "Bad Request"
except:
fail = False
if fail:
print(f"{self.ticker}: Yahoo web request for share count failed")
return None
shares_data = json_data["timeseries"]["result"]
if not "shares_out" in shares_data[0]:
print(f"{self.ticker}: Yahoo did not return share count in date range {start} -> {end}")
return None
try:
df = _pd.Series(shares_data[0]["shares_out"], index=_pd.to_datetime(shares_data[0]["timestamp"], unit="s"))
except Exception as e:
print(f"{self.ticker}: Failed to parse shares count data: "+str(e))
return None
df.index = df.index.tz_localize(tz)
df = df.sort_index()
return df
def get_isin(self, proxy=None) -> Optional[str]:
# *** experimental ***
if self._isin is not None:
@@ -843,7 +1200,7 @@ class TickerBase:
url = 'https://markets.businessinsider.com/ajax/' \
'SearchController_Suggest?max_results=25&query=%s' \
% urlencode(q)
data = self._data.get(url=url, proxy=proxy).text
data = self._data.cache_get(url=url, proxy=proxy).text
search_str = '"{}|'.format(ticker)
if search_str not in data:
@@ -865,7 +1222,7 @@ class TickerBase:
# Getting data from json
url = "{}/v1/finance/search?q={}".format(self._base_url, self.ticker)
data = self._data.get(url=url, proxy=proxy)
data = self._data.cache_get(url=url, proxy=proxy)
if "Will be right back" in data.text:
raise RuntimeError("*** YAHOO! FINANCE IS CURRENTLY DOWN! ***\n"
"Our engineers are working quickly to resolve "
@@ -896,7 +1253,7 @@ class TickerBase:
url = "{}/calendar/earnings?symbol={}&offset={}&size={}".format(
_ROOT_URL_, self.ticker, page_offset, page_size)
data = self._data.get(url=url, proxy=proxy).text
data = self._data.cache_get(url=url, proxy=proxy).text
if "Will be right back" in data:
raise RuntimeError("*** YAHOO! FINANCE IS CURRENTLY DOWN! ***\n"
@@ -956,11 +1313,17 @@ class TickerBase:
dates[cn] = _pd.to_datetime(dates[cn], format="%b %d, %Y, %I %p")
# - instead of attempting decoding of ambiguous timezone abbreviation, just use 'info':
self._quote.proxy = proxy
dates[cn] = dates[cn].dt.tz_localize(
tz=self._quote.info["exchangeTimezoneName"])
tz = self._get_ticker_tz(debug_mode=False, proxy=proxy, timeout=30)
dates[cn] = dates[cn].dt.tz_localize(tz)
dates = dates.set_index("Earnings Date")
self._earnings_dates[limit] = dates
return dates
def get_history_metadata(self) -> dict:
if self._history_metadata is None:
raise RuntimeError("Metadata was never retrieved so far, "
"call history() to retrieve it")
return self._history_metadata

View File

@@ -1,6 +1,17 @@
import functools
from functools import lru_cache
import hashlib
from base64 import b64decode
usePycryptodome = False # slightly faster
# usePycryptodome = True
if usePycryptodome:
from Crypto.Cipher import AES
from Crypto.Util.Padding import unpad
else:
from cryptography.hazmat.primitives import padding
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
import requests as requests
import re
@@ -35,6 +46,99 @@ def lru_cache_freezeargs(func):
return wrapped
def decrypt_cryptojs_aes_stores(data):
encrypted_stores = data['context']['dispatcher']['stores']
if "_cs" in data and "_cr" in data:
_cs = data["_cs"]
_cr = data["_cr"]
_cr = b"".join(int.to_bytes(i, length=4, byteorder="big", signed=True) for i in json.loads(_cr)["words"])
password = hashlib.pbkdf2_hmac("sha1", _cs.encode("utf8"), _cr, 1, dklen=32).hex()
else:
# Currently assume one extra key in dict, which is password. Print error if
# more extra keys detected.
new_keys = [k for k in data.keys() if k not in ["context", "plugins"]]
l = len(new_keys)
if l == 0:
return None
elif l == 1 and isinstance(data[new_keys[0]], str):
password_key = new_keys[0]
else:
msg = "Yahoo has again changed data format, yfinance now unsure which key(s) is for decryption:"
k = new_keys[0]
k_str = k if len(k) < 32 else k[:32-3]+"..."
msg += f" '{k_str}'->{type(data[k])}"
for i in range(1, len(new_keys)):
msg += f" , '{k_str}'->{type(data[k])}"
raise Exception(msg)
password_key = new_keys[0]
password = data[password_key]
encrypted_stores = b64decode(encrypted_stores)
assert encrypted_stores[0:8] == b"Salted__"
salt = encrypted_stores[8:16]
encrypted_stores = encrypted_stores[16:]
def EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="md5") -> tuple:
"""OpenSSL EVP Key Derivation Function
Args:
password (Union[str, bytes, bytearray]): Password to generate key from.
salt (Union[bytes, bytearray]): Salt to use.
keySize (int, optional): Output key length in bytes. Defaults to 32.
ivSize (int, optional): Output Initialization Vector (IV) length in bytes. Defaults to 16.
iterations (int, optional): Number of iterations to perform. Defaults to 1.
hashAlgorithm (str, optional): Hash algorithm to use for the KDF. Defaults to 'md5'.
Returns:
key, iv: Derived key and Initialization Vector (IV) bytes.
Taken from: https://gist.github.com/rafiibrahim8/0cd0f8c46896cafef6486cb1a50a16d3
OpenSSL original code: https://github.com/openssl/openssl/blob/master/crypto/evp/evp_key.c#L78
"""
assert iterations > 0, "Iterations can not be less than 1."
if isinstance(password, str):
password = password.encode("utf-8")
final_length = keySize + ivSize
key_iv = b""
block = None
while len(key_iv) < final_length:
hasher = hashlib.new(hashAlgorithm)
if block:
hasher.update(block)
hasher.update(password)
hasher.update(salt)
block = hasher.digest()
for _ in range(1, iterations):
block = hashlib.new(hashAlgorithm, block).digest()
key_iv += block
key, iv = key_iv[:keySize], key_iv[keySize:final_length]
return key, iv
try:
key, iv = EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="md5")
except:
raise Exception("yfinance failed to decrypt Yahoo data response")
if usePycryptodome:
cipher = AES.new(key, AES.MODE_CBC, iv=iv)
plaintext = cipher.decrypt(encrypted_stores)
plaintext = unpad(plaintext, 16, style="pkcs7")
else:
cipher = Cipher(algorithms.AES(key), modes.CBC(iv))
decryptor = cipher.decryptor()
plaintext = decryptor.update(encrypted_stores) + decryptor.finalize()
unpadder = padding.PKCS7(128).unpadder()
plaintext = unpadder.update(plaintext) + unpadder.finalize()
plaintext = plaintext.decode("utf-8")
decoded_stores = json.loads(plaintext)
return decoded_stores
_SCRAPE_URL_ = 'https://finance.yahoo.com/quote'
@@ -49,8 +153,6 @@ class TickerData:
self.ticker = ticker
self._session = session or requests
@lru_cache_freezeargs
@lru_cache(maxsize=cache_maxsize)
def get(self, url, user_agent_headers=None, params=None, proxy=None, timeout=30):
proxy = self._get_proxy(proxy)
response = self._session.get(
@@ -61,6 +163,11 @@ class TickerData:
headers=user_agent_headers or self.user_agent_headers)
return response
@lru_cache_freezeargs
@lru_cache(maxsize=cache_maxsize)
def cache_get(self, url, user_agent_headers=None, params=None, proxy=None, timeout=30):
return self.get(url, user_agent_headers, params, proxy, timeout)
def _get_proxy(self, proxy):
# setup proxy in requests format
if proxy is not None:
@@ -83,12 +190,25 @@ class TickerData:
html = self.get(url=ticker_url, proxy=proxy).text
# The actual json-data for stores is in a javascript assignment in the webpage
json_str = html.split('root.App.main =')[1].split(
'(this)')[0].split(';\n}')[0].strip()
data = json.loads(json_str)['context']['dispatcher']['stores']
try:
json_str = html.split('root.App.main =')[1].split(
'(this)')[0].split(';\n}')[0].strip()
except IndexError:
# Fetch failed, probably because Yahoo spam triggered
return {}
data = json.loads(json_str)
stores = decrypt_cryptojs_aes_stores(data)
if stores is None:
# Maybe Yahoo returned old format, not encrypted
if "context" in data and "dispatcher" in data["context"]:
stores = data['context']['dispatcher']['stores']
if stores is None:
raise Exception(f"{self.ticker}: Failed to extract data stores from web request")
# return data
new_data = json.dumps(data).replace('{}', 'null')
new_data = json.dumps(stores).replace('{}', 'null')
new_data = re.sub(
r'{[\'|\"]raw[\'|\"]:(.*?),(.*?)}', r'\1', new_data)

View File

@@ -1,6 +1,6 @@
class YFianceException(Exception):
class YFinanceException(Exception):
pass
class YFianceDataException(YFianceException):
class YFinanceDataException(YFinanceException):
pass

View File

@@ -29,7 +29,7 @@ from . import Ticker, utils
from . import shared
def download(tickers, start=None, end=None, actions=False, threads=True, ignore_tz=True,
def download(tickers, start=None, end=None, actions=False, threads=True, ignore_tz=False,
group_by='column', auto_adjust=False, back_adjust=False, repair=False, keepna=False,
progress=True, period="max", show_errors=True, interval="1d", prepost=False,
proxy=None, rounding=False, timeout=10):
@@ -68,7 +68,7 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
How many threads to use for mass downloading. Default is True
ignore_tz: bool
When combining from different timezones, ignore that part of datetime.
Default is True
Default is False
proxy: str
Optional. Proxy server URL scheme. Default is None
rounding: bool

View File

@@ -2,10 +2,11 @@ import datetime
import json
import pandas as pd
import numpy as np
from yfinance import utils
from yfinance.data import TickerData
from yfinance.exceptions import YFianceDataException, YFianceException
from yfinance.exceptions import YFinanceDataException, YFinanceException
class Fundamentals:
@@ -21,10 +22,10 @@ class Fundamentals:
self._financials_data = None
self._fin_data_quote = None
self._basics_already_scraped = False
self._financials = Fiancials(data)
self._financials = Financials(data)
@property
def financials(self) -> "Fiancials":
def financials(self) -> "Financials":
return self._financials
@property
@@ -96,32 +97,39 @@ class Fundamentals:
pass
class Fiancials:
class Financials:
def __init__(self, data: TickerData):
self._data = data
self._income = {}
self._balance_sheet = {}
self._cash_flow = {}
self._income_time_series = {}
self._balance_sheet_time_series = {}
self._cash_flow_time_series = {}
self._income_scraped = {}
self._balance_sheet_scraped = {}
self._cash_flow_scraped = {}
def get_income(self, freq="yearly", proxy=None) -> pd.DataFrame:
res = self._income
def get_income_time_series(self, freq="yearly", proxy=None) -> pd.DataFrame:
res = self._income_time_series
if freq not in res:
res[freq] = self._scrape("income", freq, proxy=None)
res[freq] = self._fetch_time_series("income", freq, proxy=None)
return res[freq]
def get_balance_sheet(self, freq="yearly", proxy=None) -> pd.DataFrame:
res = self._balance_sheet
def get_balance_sheet_time_series(self, freq="yearly", proxy=None) -> pd.DataFrame:
res = self._balance_sheet_time_series
if freq not in res:
res[freq] = self._scrape("balance-sheet", freq, proxy=None)
res[freq] = self._fetch_time_series("balance-sheet", freq, proxy=None)
return res[freq]
def get_cash_flow(self, freq="yearly", proxy=None) -> pd.DataFrame:
res = self._cash_flow
def get_cash_flow_time_series(self, freq="yearly", proxy=None) -> pd.DataFrame:
res = self._cash_flow_time_series
if freq not in res:
res[freq] = self._scrape("cash-flow", freq, proxy=None)
res[freq] = self._fetch_time_series("cash-flow", freq, proxy=None)
return res[freq]
def _scrape(self, name, timescale, proxy=None):
def _fetch_time_series(self, name, timescale, proxy=None):
# Fetching time series preferred over scraping 'QuoteSummaryStore',
# because it matches what Yahoo shows. But for some tickers returns nothing,
# despite 'QuoteSummaryStore' containing valid data.
allowed_names = ["income", "balance-sheet", "cash-flow"]
allowed_timescales = ["yearly", "quarterly"]
@@ -132,10 +140,11 @@ class Fiancials:
try:
statement = self._create_financials_table(name, timescale, proxy)
if statement is not None:
return statement
except YFianceException as e:
print("Failed to create financials table for {} reason: {}".format(name, repr(e)))
except YFinanceException as e:
print(f"- {self._data.ticker}: Failed to create {name} financials table for reason: {repr(e)}")
return pd.DataFrame()
def _create_financials_table(self, name, timescale, proxy):
@@ -144,14 +153,8 @@ class Fiancials:
name = "financials"
keys = self._get_datastore_keys(name, proxy)
try:
# Developers note: TTM and template stuff allows for reproducing the nested structure
# visible on Yahoo website. But more work needed to make it user-friendly! Ideally
# return a tree data structure instead of Pandas MultiIndex
# So until this is implemented, just return simple tables
return self.get_financials_time_series(timescale, keys, proxy)
except Exception as e:
pass
@@ -174,10 +177,10 @@ class Fiancials:
try:
keys = _finditem1("key", data_stores['FinancialTemplateStore'])
except KeyError as e:
raise YFianceDataException("Parsing FinancialTemplateStore failed, reason: {}".format(repr(e)))
raise YFinanceDataException("Parsing FinancialTemplateStore failed, reason: {}".format(repr(e)))
if not keys:
raise YFianceDataException("No keys in FinancialTemplateStore")
raise YFinanceDataException("No keys in FinancialTemplateStore")
return keys
def get_financials_time_series(self, timescale, keys: list, proxy=None) -> pd.DataFrame:
@@ -192,11 +195,11 @@ class Fiancials:
url = ts_url_base + "&type=" + ",".join([timescale + k for k in keys])
# Yahoo returns maximum 4 years or 5 quarters, regardless of start_dt:
start_dt = datetime.datetime(2016, 12, 31)
end = (datetime.datetime.now() + datetime.timedelta(days=366))
end = pd.Timestamp.utcnow().ceil("D")
url += "&period1={}&period2={}".format(int(start_dt.timestamp()), int(end.timestamp()))
# Step 3: fetch and reshape data
json_str = self._data.get(url=url, proxy=proxy).text
json_str = self._data.cache_get(url=url, proxy=proxy).text
json_data = json.loads(json_str)
data_raw = json_data["timeseries"]["result"]
# data_raw = [v for v in data_raw if len(v) > 1] # Discard keys with no data
@@ -228,3 +231,89 @@ class Fiancials:
df = df[sorted(df.columns, reverse=True)]
return df
def get_income_scrape(self, freq="yearly", proxy=None) -> pd.DataFrame:
res = self._income_scraped
if freq not in res:
res[freq] = self._scrape("income", freq, proxy=None)
return res[freq]
def get_balance_sheet_scrape(self, freq="yearly", proxy=None) -> pd.DataFrame:
res = self._balance_sheet_scraped
if freq not in res:
res[freq] = self._scrape("balance-sheet", freq, proxy=None)
return res[freq]
def get_cash_flow_scrape(self, freq="yearly", proxy=None) -> pd.DataFrame:
res = self._cash_flow_scraped
if freq not in res:
res[freq] = self._scrape("cash-flow", freq, proxy=None)
return res[freq]
def _scrape(self, name, timescale, proxy=None):
# Backup in case _fetch_time_series() fails to return data
allowed_names = ["income", "balance-sheet", "cash-flow"]
allowed_timescales = ["yearly", "quarterly"]
if name not in allowed_names:
raise ValueError("Illegal argument: name must be one of: {}".format(allowed_names))
if timescale not in allowed_timescales:
raise ValueError("Illegal argument: timescale must be one of: {}".format(allowed_names))
try:
statement = self._create_financials_table_old(name, timescale, proxy)
if statement is not None:
return statement
except YFinanceException as e:
print(f"- {self._data.ticker}: Failed to create financials table for {name} reason: {repr(e)}")
return pd.DataFrame()
def _create_financials_table_old(self, name, timescale, proxy):
data_stores = self._data.get_json_data_stores("financials", proxy)
# Fetch raw data
if not "QuoteSummaryStore" in data_stores:
raise YFinanceDataException(f"Yahoo not returning legacy financials data")
data = data_stores["QuoteSummaryStore"]
if name == "cash-flow":
key1 = "cashflowStatement"
key2 = "cashflowStatements"
elif name == "balance-sheet":
key1 = "balanceSheet"
key2 = "balanceSheetStatements"
else:
key1 = "incomeStatement"
key2 = "incomeStatementHistory"
key1 += "History"
if timescale == "quarterly":
key1 += "Quarterly"
if key1 not in data or data[key1] is None or key2 not in data[key1]:
raise YFinanceDataException(f"Yahoo not returning legacy {name} financials data")
data = data[key1][key2]
# Tabulate
df = pd.DataFrame(data)
if len(df) == 0:
raise YFinanceDataException(f"Yahoo not returning legacy {name} financials data")
df = df.drop(columns=['maxAge'])
for col in df.columns:
df[col] = df[col].replace('-', np.nan)
df.set_index('endDate', inplace=True)
try:
df.index = pd.to_datetime(df.index, unit='s')
except ValueError:
df.index = pd.to_datetime(df.index)
df = df.T
df.columns.name = ''
df.index.name = 'Breakdown'
# rename incorrect yahoo key
df.rename(index={'treasuryStock': 'gainsLossesNotAffectingRetainedEarnings'}, inplace=True)
# Upper-case first letter, leave rest unchanged:
s0 = df.index[0]
df.index = [s[0].upper()+s[1:] for s in df.index]
return df

View File

@@ -34,7 +34,7 @@ class Holders:
def _scrape(self, proxy):
ticker_url = "{}/{}".format(self._SCRAPE_URL_, self._data.ticker)
try:
resp = self._data.get(ticker_url + '/holders', proxy)
resp = self._data.cache_get(ticker_url + '/holders', proxy)
holders = pd.read_html(resp.text)
except Exception:
holders = []

View File

@@ -19,13 +19,11 @@ class Quote:
self._calendar = None
self._already_scraped = False
self._already_scraped_complementary = False
@property
def info(self) -> dict:
if self._info is None:
self._scrape(self.proxy)
self._scrape_complementary(self.proxy)
return self._info
@@ -154,57 +152,3 @@ class Quote:
'Firm', 'To Grade', 'From Grade', 'Action']].sort_index()
except Exception:
pass
def _scrape_complementary(self, proxy):
if self._already_scraped_complementary:
return
self._already_scraped_complementary = True
self._scrape(proxy)
if self._info is None:
return
# Complementary key-statistics. For now just want 'trailing PEG ratio'
keys = {"trailingPegRatio"}
if keys:
# Simplified the original scrape code for key-statistics. Very expensive for fetching
# just one value, best if scraping most/all:
#
# p = _re.compile(r'root\.App\.main = (.*);')
# url = 'https://finance.yahoo.com/quote/{}/key-statistics?p={}'.format(self._ticker.ticker, self._ticker.ticker)
# try:
# r = session.get(url, headers=utils.user_agent_headers)
# data = _json.loads(p.findall(r.text)[0])
# key_stats = data['context']['dispatcher']['stores']['QuoteTimeSeriesStore']["timeSeries"]
# for k in keys:
# if k not in key_stats or len(key_stats[k])==0:
# # Yahoo website prints N/A, indicates Yahoo lacks necessary data to calculate
# v = None
# else:
# # Select most recent (last) raw value in list:
# v = key_stats[k][-1]["reportedValue"]["raw"]
# self._info[k] = v
# except Exception:
# raise
# pass
#
# For just one/few variable is faster to query directly:
url = "https://query1.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{}?symbol={}".format(
self._data.ticker, self._data.ticker)
for k in keys:
url += "&type=" + k
# Request 6 months of data
url += "&period1={}".format(
int((datetime.datetime.now() - datetime.timedelta(days=365 // 2)).timestamp()))
url += "&period2={}".format(int((datetime.datetime.now() + datetime.timedelta(days=1)).timestamp()))
json_str = self._data.get(url=url, proxy=proxy).text
json_data = json.loads(json_str)
key_stats = json_data["timeseries"]["result"][0]
if k not in key_stats:
# Yahoo website prints N/A, indicates Yahoo lacks necessary data to calculate
v = None
else:
# Select most recent (last) raw value in list:
v = key_stats[k][-1]["reportedValue"]["raw"]
self._info[k] = v

111
yfinance/scrapers/stats.py Normal file
View File

@@ -0,0 +1,111 @@
import datetime as _dt
import re as _re
import pandas as _pd
from yfinance import utils
from yfinance.data import TickerData
from pprint import pprint
class KeyStats:
def __init__(self, data: TickerData, proxy=None):
self._data = data
self.proxy = proxy
self._stats = None
self._valuations = None
self._already_scraped = False
@property
def stats(self) -> dict:
if self._stats is None:
self._scrape(self.proxy)
return self._stats
@property
def valuations(self) -> dict:
if self._valuations is None:
self._scrape(self.proxy)
return self._valuations
def _scrape(self, proxy):
if self._already_scraped:
return
self._already_scraped = True
data = self._data.get_json_data_stores('key-statistics', proxy)
self._stats = data['QuoteSummaryStore']
del self._stats["defaultKeyStatistics"] # available in Ticker.info
del self._stats["financialData"] # available in Ticker.info
exchange_tz = self._stats["quoteType"]["exchangeTimezoneName"]
try:
c = "calendarEvents"
for k in ["dividendDate", "exDividendDate"]:
self._stats[c][k] = _pd.to_datetime(self._stats[c][k], unit='s', utc=True)
if self._stats[c][k].time() == _dt.time(0):
# Probably not UTC but meant to be in exchange timezone
self._stats[c][k] = self._stats[c][k].tz_convert(None).tz_localize(exchange_tz)
except:
pass
ts = data['QuoteTimeSeriesStore']["timeSeries"]
trailing_series = []
year_series = []
for k in ts:
if len(ts[k]) == 0:
# Yahoo website prints N/A, indicates Yahoo lacks necessary data to calculate
continue
if len(ts[k]) == 1:
date = _pd.to_datetime(ts[k][0]["asOfDate"])
v = ts[k][0]["reportedValue"]
if isinstance(v, dict):
v = v["raw"]
k = _re.sub("^trailing", "", k)
trailing_series.append(_pd.Series([v], index=[date], name=k))
else:
if k == "timestamp":
continue
dates = [d["asOfDate"] for d in ts[k]]
dates = _pd.to_datetime(dates)
has_raw = isinstance(ts[k][0]["reportedValue"], dict) and "raw" in ts[k][0]["reportedValue"]
if has_raw:
values = [d["reportedValue"]["raw"] for d in ts[k]]
else:
values = [d["reportedValue"] for d in ts[k]]
k = _re.sub("^quarterly", "", k)
year_series.append(_pd.Series(values, index=dates, name=k))
year_table = None
if len(year_series) > 0:
year_table = _pd.concat(year_series, axis=1)
trailing_table = None
if len(trailing_series) > 0:
trailing_table = _pd.concat(trailing_series, axis=1)
tables = [t for t in [year_table, trailing_table] if not t is None]
if len(tables) == 0:
table = _pd.DataFrame()
else:
if len(tables) == 1:
table = tables[0]
else:
table = _pd.concat(tables, axis=0)
table = table.T
table = table[table.columns.sort_values(ascending=False)]
self._valuations = table

View File

@@ -137,6 +137,14 @@ class Ticker(TickerBase):
def info(self) -> dict:
return self.get_info()
@property
def stats(self) -> _pd.DataFrame:
return self.get_stats()
@property
def valuations(self) -> dict:
return self.get_valuations()
@property
def calendar(self) -> _pd.DataFrame:
return self.get_calendar()
@@ -155,19 +163,35 @@ class Ticker(TickerBase):
@property
def income_stmt(self) -> _pd.DataFrame:
return self.get_income_stmt()
return self.get_income_stmt(pretty=True)
@property
def quarterly_income_stmt(self) -> _pd.DataFrame:
return self.get_income_stmt(freq='quarterly')
return self.get_income_stmt(pretty=True, freq='quarterly')
@property
def incomestmt(self) -> _pd.DataFrame:
return self.income_stmt
@property
def quarterly_incomestmt(self) -> _pd.DataFrame:
return self.quarterly_income_stmt
@property
def financials(self) -> _pd.DataFrame:
return self.income_stmt
@property
def quarterly_financials(self) -> _pd.DataFrame:
return self.quarterly_income_stmt
@property
def balance_sheet(self) -> _pd.DataFrame:
return self.get_balance_sheet()
return self.get_balance_sheet(pretty=True)
@property
def quarterly_balance_sheet(self) -> _pd.DataFrame:
return self.get_balance_sheet(freq='quarterly')
return self.get_balance_sheet(pretty=True, freq='quarterly')
@property
def balancesheet(self) -> _pd.DataFrame:
@@ -177,13 +201,21 @@ class Ticker(TickerBase):
def quarterly_balancesheet(self) -> _pd.DataFrame:
return self.quarterly_balance_sheet
@property
def cash_flow(self) -> _pd.DataFrame:
return self.get_cash_flow(pretty=True, freq="yearly")
@property
def quarterly_cash_flow(self) -> _pd.DataFrame:
return self.get_cash_flow(pretty=True, freq='quarterly')
@property
def cashflow(self) -> _pd.DataFrame:
return self.get_cashflow(freq="yearly")
return self.cash_flow
@property
def quarterly_cashflow(self) -> _pd.DataFrame:
return self.get_cashflow(freq='quarterly')
return self.quarterly_cash_flow
@property
def recommendations_summary(self):
@@ -222,3 +254,7 @@ class Ticker(TickerBase):
@property
def earnings_forecasts(self) -> _pd.DataFrame:
return self.get_earnings_forecast()
@property
def history_metadata(self) -> dict:
return self.get_history_metadata()

View File

@@ -22,7 +22,8 @@
from __future__ import print_function
import datetime as _datetime
from typing import Dict, Union
import dateutil as _dateutil
from typing import Dict, Union, List, Optional
import pytz as _tz
import requests as _requests
@@ -216,7 +217,7 @@ def format_annual_financial_statement(level_detail, annual_dicts, annual_order,
else:
_statement = Annual
_statement.index = camel2title(_statement.T)
_statement.index = camel2title(_statement.T.index)
_statement['level_detail'] = level_detail
_statement = _statement.set_index([_statement.index, 'level_detail'])
_statement = _statement[sorted(_statement.columns, reverse=True)]
@@ -241,8 +242,50 @@ def format_quarterly_financial_statement(_statement, level_detail, order):
return _statement
def camel2title(o):
return [_re.sub("([a-z])([A-Z])", r"\g<1> \g<2>", i).title() for i in o]
def camel2title(strings: List[str], sep: str = ' ', acronyms: Optional[List[str]] = None) -> List[str]:
if isinstance(strings, str) or not hasattr(strings, '__iter__'):
raise TypeError("camel2title() 'strings' argument must be iterable of strings")
if len(strings) == 0:
return strings
if not isinstance(strings[0], str):
raise TypeError("camel2title() 'strings' argument must be iterable of strings")
if not isinstance(sep, str) or len(sep) != 1:
raise ValueError(f"camel2title() 'sep' argument = '{sep}' must be single character")
if _re.match("[a-zA-Z0-9]", sep):
raise ValueError(f"camel2title() 'sep' argument = '{sep}' cannot be alpha-numeric")
if _re.escape(sep) != sep and sep not in {' ', '-'}:
# Permit some exceptions, I don't understand why they get escaped
raise ValueError(f"camel2title() 'sep' argument = '{sep}' cannot be special character")
if acronyms is None:
pat = "([a-z])([A-Z])"
rep = rf"\g<1>{sep}\g<2>"
return [_re.sub(pat, rep, s).title() for s in strings]
# Handling acronyms requires more care. Assumes Yahoo returns acronym strings upper-case
if isinstance(acronyms, str) or not hasattr(acronyms, '__iter__') or not isinstance(acronyms[0], str):
raise TypeError("camel2title() 'acronyms' argument must be iterable of strings")
for a in acronyms:
if not _re.match("^[A-Z]+$", a):
raise ValueError(f"camel2title() 'acronyms' argument must only contain upper-case, but '{a}' detected")
# Insert 'sep' between lower-then-upper-case
pat = "([a-z])([A-Z])"
rep = rf"\g<1>{sep}\g<2>"
strings = [_re.sub(pat, rep, s) for s in strings]
# Insert 'sep' after acronyms
for a in acronyms:
pat = f"({a})([A-Z][a-z])"
rep = rf"\g<1>{sep}\g<2>"
strings = [_re.sub(pat, rep, s) for s in strings]
# Apply str.title() to non-acronym words
strings = [s.split(sep) for s in strings]
strings = [[j.title() if not j in acronyms else j for j in s] for s in strings]
strings = [sep.join(s) for s in strings]
return strings
def _parse_user_dt(dt, exchange_tz):
@@ -262,7 +305,17 @@ def _parse_user_dt(dt, exchange_tz):
return dt
def _interval_to_timedelta(interval):
if interval == "1mo":
return _dateutil.relativedelta(months=1)
elif interval == "1wk":
return _pd.Timedelta(days=7, unit='d')
else:
return _pd.Timedelta(interval)
def auto_adjust(data):
col_order = data.columns
df = data.copy()
ratio = df["Close"] / df["Adj Close"]
df["Adj Open"] = df["Open"] / ratio
@@ -278,13 +331,13 @@ def auto_adjust(data):
"Adj Low": "Low", "Adj Close": "Close"
}, inplace=True)
df = df[["Open", "High", "Low", "Close", "Volume"]]
return df[["Open", "High", "Low", "Close", "Volume"]]
return df[[c for c in col_order if c in df.columns]]
def back_adjust(data):
""" back-adjusted data to mimic true historical prices """
col_order = data.columns
df = data.copy()
ratio = df["Adj Close"] / df["Close"]
df["Adj Open"] = df["Open"] * ratio
@@ -300,7 +353,7 @@ def back_adjust(data):
"Adj Low": "Low"
}, inplace=True)
return df[["Open", "High", "Low", "Close", "Volume"]]
return df[[c for c in col_order if c in df.columns]]
def parse_quotes(data):
@@ -402,7 +455,7 @@ def fix_Yahoo_returning_live_separate(quotes, interval, tz_exchange):
elif interval == "3mo":
last_rows_same_interval = dt1.year == dt2.year and dt1.quarter == dt2.quarter
else:
last_rows_same_interval = False
last_rows_same_interval = (dt1-dt2) < _pd.Timedelta(interval)
if last_rows_same_interval:
# Last two rows are within same interval
@@ -481,7 +534,7 @@ def safe_merge_dfs(df_main, df_sub, interval):
new_index = None
if new_index is not None:
new_index = new_index.tz_localize(df.index.tz, ambiguous=True)
new_index = new_index.tz_localize(df.index.tz, ambiguous=True, nonexistent='shift_forward')
df_sub = _reindex_events(df_sub, new_index, data_col)
df = df_main.join(df_sub)
@@ -551,13 +604,15 @@ def safe_merge_dfs(df_main, df_sub, interval):
## Not always possible to match events with trading, e.g. when released pre-market.
## So have to append to bottom with nan prices.
## But should only be impossible with intra-day price data.
if interval.endswith('m') or interval.endswith('h'):
if interval.endswith('m') or interval.endswith('h') or interval == "1d":
# Update: is possible with daily data when dividend very recent
f_missing = ~df_sub.index.isin(df.index)
df_sub_missing = df_sub[f_missing]
df_sub_missing = df_sub[f_missing].copy()
keys = {"Adj Open", "Open", "Adj High", "High", "Adj Low", "Low", "Adj Close",
"Close"}.intersection(df.columns)
df_sub_missing[list(keys)] = _np.nan
df = _pd.concat([df, df_sub_missing], sort=True)
col_ordering = df.columns
df = _pd.concat([df, df_sub_missing], sort=True)[col_ordering]
else:
raise Exception("Lost data during merge despite all attempts to align data (see above)")
@@ -688,8 +743,10 @@ class _TzCache:
"""Simple sqlite file cache of ticker->timezone"""
def __init__(self):
self._tz_db = None
self._setup_cache_folder()
# Must init db here, where is thread-safe
self._tz_db = _KVStore(_os.path.join(self._db_dir, "tkr-tz.db"))
self._migrate_cache_tkr_tz()
def _setup_cache_folder(self):
if not _os.path.isdir(self._db_dir):
@@ -721,11 +778,6 @@ class _TzCache:
@property
def tz_db(self):
# lazy init
if self._tz_db is None:
self._tz_db = _KVStore(_os.path.join(self._db_dir, "tkr-tz.db"))
self._migrate_cache_tkr_tz()
return self._tz_db
def _migrate_cache_tkr_tz(self):

View File

@@ -1 +1 @@
version = "0.2.0rc1"
version = "0.2.3"