Compare commits

..

108 Commits

Author SHA1 Message Date
ValueRaider
dfffa6a551 Bump version to 0.1.96 2022-12-20 20:50:01 +00:00
ValueRaider
787b89c269 Merge pull request #1258 from ranaroussi/r0.1/fix/info-not-caching
Another info[] fix for #1256
2022-12-20 20:48:27 +00:00
ValueRaider
882f8b3367 Another info[] fix for #1256 2022-12-19 22:50:12 +00:00
ValueRaider
338a94a8f3 Bump version to 0.1.95 2022-12-19 21:46:58 +00:00
ValueRaider
e108a543fa Merge pull request #1257 from ranaroussi/r0.1/fix/quotes-html-parsing
Fix quotes html parsing when missing root.App.main
2022-12-19 21:43:42 +00:00
ValueRaider
071c3937b5 Add 'G7W.DU' to test 2022-12-19 16:48:10 +00:00
ValueRaider
a279d06810 Fix quotes html parsing when missing root.App.main 2022-12-19 16:18:43 +00:00
ValueRaider
80db9dfe3c Bump version to 0.1.94 2022-12-19 11:17:51 +00:00
ValueRaider
6bb23e05c2 Fix delisted ticker info[] 2022-12-19 11:16:45 +00:00
ValueRaider
edf2f69b62 Bump version to 0.1.93 2022-12-18 22:16:28 +00:00
ValueRaider
ce4c2e457d Fix Ticker.shares 2022-12-18 22:15:44 +00:00
ValueRaider
4fc15251a0 Bump version to 0.1.92 2022-12-18 21:45:07 +00:00
ValueRaider
c4600d6bd9 Fix setup.py 2022-12-18 21:44:30 +00:00
ValueRaider
14a839582d Bump version to 0.1.91 2022-12-18 21:39:08 +00:00
ValueRaider
3ae0434567 Merge pull request #1255 from ranaroussi/fix/decode-Yahoo-encryption
Backport Yahoo decryption
2022-12-18 21:37:26 +00:00
ValueRaider
f24dab2f26 Add 'cryptography' requirement 2022-12-18 21:35:07 +00:00
ValueRaider
b47adf0a90 Backport Yahoo decryption 2022-12-18 20:42:03 +00:00
ValueRaider
3537ec3e4b Bump version to 0.1.90 2022-12-13 15:36:36 +00:00
ValueRaider
6a306b0353 Merge pull request #1237 from ranaroussi/r0.1/fix/lxml
Restore lxml dep, set min ver = 4.9.1
2022-12-13 15:35:12 +00:00
ValueRaider
6e3282badb Restore lxml dep, set min ver = 4.9.1 2022-12-13 15:03:13 +00:00
ValueRaider
829683ca02 Bump version to 0.1.89 2022-12-12 22:06:28 +00:00
ValueRaider
3011cb324d Bump version to 0.1.88 2022-12-12 22:04:02 +00:00
ValueRaider
366cfc0795 Merge pull request #1231 from ranaroussi/r0.1/fix/reqs
Bump pandas to 1.3.0 ; Remove unused lxml
2022-12-10 18:17:51 +00:00
ValueRaider
cbd4b924b8 Bump pandas to 1.3.0 ; Remove unused lxml 2022-12-10 14:25:07 +00:00
ValueRaider
56759e3f3c Bump version to 0.1.87 2022-11-16 12:38:10 +00:00
ValueRaider
c193428b38 Merge pull request #1163 from ranaroussi/patch/threads-print-deadlock
Fix disable prints inside threads (bpython deadlock)
2022-11-16 12:36:48 +00:00
ValueRaider
a625d9e9c5 Merge pull request #1176 from ranaroussi/patch/dst-nonexistent
Fix localizing midnight when non-existent (DST)
2022-11-16 12:33:54 +00:00
ValueRaider
36e80a73f7 Fix localizing midnight when non-existent (DST) 2022-11-16 12:28:29 +00:00
ValueRaider
cdae1cf226 Bump version to 0.1.86 2022-11-14 12:49:43 +00:00
ValueRaider
bca569318e Merge pull request #1170 from ranaroussi/patch/default-start
Backport #1169 (default start)
2022-11-13 11:52:51 +00:00
ValueRaider
d11cd85a66 Backport #1169 (default start) 2022-11-13 11:51:41 +00:00
ValueRaider
2d32a6e204 Merge pull request #1162 from ranaroussi/patch/tz-csv-error
Fix corrupt tkr-tz-csv halting code
2022-11-10 21:51:10 +00:00
ValueRaider
bad6456a44 Fix disable prints inside threads (bpython deadlock) 2022-11-10 18:30:34 +00:00
ValueRaider
1687ae66ab Fix corrupt tkr-tz-csv halting code 2022-11-10 14:19:21 +00:00
ValueRaider
ddc34348d9 Merge pull request #1142 from ranaroussi/patch-0.1/delisted-tkr-errors
Improve handling delisted tickers
2022-11-03 22:56:16 +00:00
ValueRaider
1d74cfeb19 Merge pull request #1141 from ranaroussi/patch-0.1/trailing-peg-ratio
Move get 'trailingPegRatio' into _get_info(), simplify & optimise
2022-11-03 22:55:39 +00:00
ValueRaider
1589d07b56 Move get 'trailingPegRatio' into _get_info(), simplify & optimise 2022-11-03 22:53:04 +00:00
ValueRaider
d261237320 Improve handling delisted tickers 2022-11-03 22:49:12 +00:00
ValueRaider
66af3080dd Bump version to 0.1.85 2022-11-03 19:04:45 +00:00
ValueRaider
9d396b9559 Merge pull request #1135 from ranaroussi/patch/unknown-ticker-timezone
Backport ticker tz verification for nice error
2022-11-02 15:18:26 +00:00
ValueRaider
23b6ad12c1 Backport ticker tz verification for nice error 2022-10-31 21:14:50 +00:00
ValueRaider
22131e9fc7 Merge pull request #1124 from Jossan84/main
Bugfix: Get logo url when no website exists
2022-10-27 22:34:18 +01:00
ValueRaider
e99e61f95a Bump version to 0.1.84 2022-10-26 00:12:29 +01:00
ValueRaider
a3fe95ea27 Make tz-cache thread-safe 2022-10-26 00:09:23 +01:00
ValueRaider
000cb70bcb Bump version to 0.1.83 2022-10-25 23:23:32 +01:00
ValueRaider
c8d9d06e75 Expose _fetch_ticker_tz() arguments 2022-10-25 23:21:56 +01:00
ValueRaider
a5e07a0375 Bump version to 0.1.82 2022-10-25 23:15:48 +01:00
ValueRaider
a0a12bcf4c Backport _fetch_ticker_tz() 2022-10-25 23:07:48 +01:00
Ran Aroussi
c49cf626bb Update bug_report.md 2022-10-24 11:37:15 +01:00
Ran Aroussi
fa6f3fc537 Update bug_report.md 2022-10-24 11:37:03 +01:00
ValueRaider
34dfe944d9 Bump version to 0.1.81 2022-10-23 19:54:28 +01:00
ValueRaider
9619839bf5 Merge pull request #1108 from ranaroussi/hotfix/cache-on-read-only-system
Fix cache error on read only system
2022-10-23 19:52:41 +01:00
ValueRaider
90e00a71ca Fix missing 'return' 2022-10-23 19:51:09 +01:00
ValueRaider
f525ee2f5e Add README section on tz-cache ; Add set_tz_cache_location() 2022-10-23 19:47:22 +01:00
ValueRaider
ef12c8b600 Catch read-only exceptions during cache write 2022-10-23 19:29:54 +01:00
ValueRaider
42e6d0894e Bump version to 0.1.80 2022-10-22 13:25:59 +01:00
ValueRaider
de1c3c091b Merge pull request #1103 from ranaroussi/hotfix/download-timezones-patch
Fix download(ignore_tz=True) for single ticker
2022-10-22 13:15:52 +01:00
ValueRaider
c6c0fa3347 Fix download(ignore_tz=True) for single ticker 2022-10-22 13:14:35 +01:00
ValueRaider
75c823a72c Merge pull request #1101 from ranaroussi/hotfix/tz-dst-ambiguous
Fix tz-localize when DST-ambiguous
2022-10-21 15:29:52 +01:00
ValueRaider
f1ad8f0061 Fix tz-localize when DST-ambiguous 2022-10-21 12:43:50 +01:00
ValueRaider
b27cc0cf40 Update to 0.1.79 2022-10-18 20:07:40 +01:00
ValueRaider
1d7f8139d6 Fix when Yahoo returns price=NaNs on dividend day 2022-10-18 20:04:08 +01:00
ValueRaider
01ef1bb813 Update to 0.1.78 2022-10-18 12:53:22 +01:00
ValueRaider
1db6be75b8 Merge pull request #1093 from ranaroussi/fix/download-timezones
Add 'ignore_tz' arg to download()
2022-10-18 12:36:00 +01:00
ValueRaider
7902ec8667 Fix empty-df detection and date ordering 2022-10-18 12:31:51 +01:00
ValueRaider
ff42a3ac87 Add 'ignore_tz' arg to download() 2022-10-18 12:22:43 +01:00
ValueRaider
51f2c7301d Bump version again to 0.1.77 to skip bad tag 2022-10-07 22:04:16 +01:00
ValueRaider
632a16670a Bump version to 0.1.76 2022-10-07 21:55:15 +01:00
ValueRaider
fea0dca6f4 Merge pull request #1078 from ranaroussi/fix/info-access-when-unlisted
Fix/info access when unlisted
2022-10-07 21:44:44 +01:00
ValueRaider
c7e95152a0 Tidy code 2022-10-07 17:23:59 +01:00
ValueRaider
a52e972d04 Pretty error check for timezone retrieval 2022-10-07 17:20:58 +01:00
Ran Aroussi
a197d9f78e updated changelog 2022-10-04 12:09:19 +01:00
Ran Aroussi
dbb9bbfbf3 Updated to 0.1.75 2022-10-04 12:01:30 +01:00
ValueRaider
a7b053addd Merge pull request #1067 from ranaroussi/fix/deps
Add new dep 'appdirs' for recent tz-fixes PR
2022-10-01 13:52:10 +01:00
ValueRaider
e8ca256c10 Add new dep 'appdirs' for recent tz-fixes PR 2022-10-01 13:49:02 +01:00
ValueRaider
f651dd1e93 Merge pull request #1048 from yfinance-fork-team/fix/timezone
Fix timezone & datetime handling
2022-09-21 16:53:29 +01:00
ValueRaider
f40cf0aae1 Merge branch 'main' into fix/timezone 2022-09-20 23:28:17 +01:00
ValueRaider
200f57c458 Merge pull request #1033 from yfinance-fork-team/fix/yahoo-appending-latest-price
Bugfix: Relocate out-of-range check to before 15m->30m conversion
2022-09-20 20:24:13 +01:00
ValueRaider
e5d45eaa85 Merge pull request #1032 from yfinance-fork-team/feature/keep-na
Add 'keepna' argument
2022-09-20 14:17:34 +01:00
ValueRaider
42b77a9b54 Merge pull request #1042 from yfinance-fork-team/feature/init-perf-boost
Ticker.init() perf boost
2022-09-20 14:15:50 +01:00
Jose Manuel
42e5751705 Bugfix: Get logo url when no website exists 2022-09-19 13:54:56 +02:00
ValueRaider
bca005a2c0 Move user dt parsing to utils.py 2022-09-04 17:45:37 +01:00
ValueRaider
ca891bb187 Restore index to DatetimeIndex ; remove tz arg ; refactor for clean merge 2022-09-04 16:09:55 +01:00
Value Raider
0939ff3c78 Support user providing epoch ints 2022-08-15 16:04:46 +01:00
Value Raider
6f5c5635be Preemptive bugfix 2022-07-28 12:30:13 +01:00
Value Raider
809622e426 Ticker init does unnecessary work - move it out 2022-07-28 12:25:43 +01:00
Value Raider
eec1f3dbad Fix when Yahoo messes up DST 2022-07-27 21:54:12 +01:00
Value Raider
1de789ad72 Fix timestamp->dt 2022-07-15 00:00:08 +01:00
Value Raider
cd68ff68c6 Adjust prices-after-end-dt fix from > to >= 2022-07-14 23:45:18 +01:00
Value Raider
9673970f45 Relocate out-of-range check to before 15m->30m interpolation 2022-07-14 23:04:41 +01:00
Value Raider
6ea69a70ac Add 'keepna' argument 2022-07-14 14:41:24 +01:00
Value Raider
c723a5ab44 Preemptive perf. boost 2022-07-14 01:03:24 +01:00
Value Raider
50741d1409 Add tz to daily/weekly - bugfix 2022-07-12 23:04:04 +01:00
Value Raider
69d0dcd62b Add tz to daily/weekly 2022-07-12 22:54:01 +01:00
Value Raider
5c9348f255 end time should default=00:00 not end-of-day - fix comment 2022-07-12 21:30:29 +01:00
Value Raider
a472546e7b end time should default=00:00 not end-of-day 2022-07-12 21:30:29 +01:00
Value Raider
c914f1f183 Fix user-supplied TZ ; reduce Git diff 2022-07-12 21:30:29 +01:00
Value Raider
92c82342fe Fix PYTZ misuse 2022-07-12 21:30:29 +01:00
Value Raider
7ae08b04f3 Fix setting end dt to midnight 2022-07-12 21:30:29 +01:00
Value Raider
4b50f1e81c Perf boost: store ticker timezone in user file cache 2022-07-12 21:30:29 +01:00
Value Raider
1ed58be749 Perf boost: separate info fetch from financials, to get tz fast 2022-07-12 21:30:28 +01:00
Value Raider
375b4f9376 Fix error when adding Dividends/Splits to df 2022-07-12 21:27:58 +01:00
Value Raider
b6b4426ca9 User-provided 'tz' should 'convert' not 'replace' UTC 2022-07-12 21:27:58 +01:00
Value Raider
149ebe46db Remove debugging print stmt 2022-07-12 21:27:58 +01:00
Value Raider
d80b27cfde Fix Yahoo returning tz in data 2022-07-12 21:27:58 +01:00
Value Raider
36e277317b Fix date->datetime conversion 2022-07-12 21:27:58 +01:00
Value Raider
0e1ea4d2c6 Replace zoneinfo with pytz (installed by pandas) 2022-07-12 21:27:58 +01:00
Value Raider
2d96c383ef Set start/end timezone to exchange if None 2022-07-12 21:27:57 +01:00
12 changed files with 713 additions and 252 deletions

View File

@@ -14,4 +14,7 @@ Before posting an issue - please upgrade to the latest version and confirm the i
Upgrade using:
`$ pip install yfinance --upgrade --no-cache-dir`
Bug still there? Delete this content and submit your bug report here...
Bug still there? Delete this content and submit your bug report here and provide the following, as best you can:
- Simple code that reproduces your problem
- The error message

View File

@@ -1,6 +1,88 @@
Change Log
===========
0.1.96
------
- Fix info[] not caching #1258
0.1.95
------
- Fix info[] bug #1257
0.1.94
------
- Fix delisted ticker info[]
0.1.93
------
- Fix Ticker.shares
0.1.92
------
- Decrypt new Yahoo encryption #1255
0.1.90
------
- Restore lxml req, increase min ver #1237
0.1.89
------
- Remove unused incompatible dependency #1222
- Fix minimum Pandas version #1230
0.1.87
------
- Fix localizing midnight when non-existent (DST) #1176
- Fix thread deadlock in bpython #1163
0.1.86
------
- Fix 'trailingPegRatio' #1141
- Improve handling delisted tickers #1142
- Fix corrupt tkr-tz-csv halting code #1162
- Change default start to 1900-01-01 #1170
0.1.85
------
- Fix info['log_url'] #1062
- Fix handling delisted ticker #1137
0.1.84
------
- Make tz-cache thread-safe
0.1.83
------
- Reduce spam-effect of tz-fetch
0.1.81
------
- Fix unhandled tz-cache exception #1107
0.1.80
------
- Fix `download(ignore_tz=True)` for single ticker #1097
- Fix rare case of error "Cannot infer DST time" #1100
0.1.79
------
- Fix when Yahoo returns price=NaNs on dividend day
0.1.78
------
- Fix download() when different timezones #1085
0.1.77
------
- Fix user experience bug #1078
0.1.75
------
- Fixed datetime-related issues: #1048
- Add 'keepna' argument #1032
- Speedup Ticker() creation #1042
- Improve a bugfix #1033
0.1.74
------
- Fixed bug introduced in 0.1.73 (sorry :/)

View File

@@ -48,8 +48,6 @@ Yahoo! finance API is intended for personal use only.**
The `Ticker` module, which allows you to access ticker data in a more Pythonic way:
Note: yahoo finance datetimes are received as UTC.
```python
import yfinance as yf
@@ -187,6 +185,11 @@ data = yf.download( # or pdr.get_data_yahoo(...
# (optional, default is '1d')
interval = "1m",
# Whether to ignore timezone when aligning ticker data from
# different timezones. Default is True. False may be useful for
# minute/hourly data.
ignore_tz = False,
# group by ticker (to access via data['SPY'])
# (optional, default is 'column')
group_by = 'ticker',
@@ -209,6 +212,18 @@ data = yf.download( # or pdr.get_data_yahoo(...
)
```
### Timezone cache store
When fetching price data, all dates are localized to stock exchange timezone.
But timezone retrieval is relatively slow, so yfinance attemps to cache them
in your users cache folder.
You can direct cache to use a different location with `set_tz_cache_location()`:
```python
import yfinance as yf
yf.set_tz_cache_location("custom/cache/location")
...
```
### Managing Multi-Level Columns
The following answer on Stack Overflow is for [How to deal with
@@ -259,11 +274,12 @@ To install `yfinance` using `conda`, see
### Requirements
- [Python](https://www.python.org) \>= 2.7, 3.4+
- [Pandas](https://github.com/pydata/pandas) (tested to work with
\>=0.23.1)
- [Numpy](http://www.numpy.org) \>= 1.11.1
- [requests](http://docs.python-requests.org/en/master/) \>= 2.14.2
- [lxml](https://pypi.org/project/lxml/) \>= 4.5.1
- [Pandas](https://github.com/pydata/pandas) \>= 1.3.0
- [Numpy](http://www.numpy.org) \>= 1.16.5
- [requests](http://docs.python-requests.org/en/master/) \>= 2.26
- [lxml](https://pypi.org/project/lxml/) \>= 4.9.1
- [appdirs](https://pypi.org/project/appdirs) \>= 1.4.4
- [cryptography](https://pypi.org/project/cryptography) \>=3.3.2
### Optional (if you want to use `pandas_datareader`)

View File

@@ -1,5 +1,5 @@
{% set name = "yfinance" %}
{% set version = "0.1.58" %}
{% set version = "0.1.96" %}
package:
name: "{{ name|lower }}"
@@ -16,20 +16,24 @@ build:
requirements:
host:
- pandas >=0.24.0
- pandas >=1.3.0
- numpy >=1.16.5
- requests >=2.21
- requests >=2.26
- multitasking >=0.0.7
- lxml >=4.5.1
- lxml >=4.9.1
- appdirs >= 1.4.4
- cryptography >= 3.3.2
- pip
- python
run:
- pandas >=0.24.0
- pandas >=1.3.0
- numpy >=1.16.5
- requests >=2.21
- requests >=2.26
- multitasking >=0.0.7
- lxml >=4.5.1
- lxml >=4.9.1
- appdirs >= 1.4.4
- cryptography >= 3.3.2
- python
test:

View File

@@ -1,5 +1,7 @@
pandas>=0.24.0
pandas>=1.3.0
numpy>=1.16.5
requests>=2.26
multitasking>=0.0.7
lxml>=4.5.1
lxml>=4.9.1
appdirs>=1.4.4
cryptography>=3.3.2

View File

@@ -61,9 +61,10 @@ setup(
platforms=['any'],
keywords='pandas, yahoo finance, pandas datareader',
packages=find_packages(exclude=['contrib', 'docs', 'tests', 'examples']),
install_requires=['pandas>=0.24.0', 'numpy>=1.15',
install_requires=['pandas>=1.3.0', 'numpy>=1.16.5',
'requests>=2.26', 'multitasking>=0.0.7',
'lxml>=4.5.1'],
'lxml>=4.9.1', 'appdirs>=1.4.4',
'cryptography>=3.3.2'],
entry_points={
'console_scripts': [
'sample=sample:main',

View File

@@ -15,21 +15,90 @@ Sanity check for most common library uses all working
import yfinance as yf
import unittest
import datetime
symbols = ['MSFT', 'IWO', 'VFINX', '^GSPC', 'BTC-USD']
tickers = [yf.Ticker(symbol) for symbol in symbols]
session = None
import requests_cache ; session = requests_cache.CachedSession("yfinance.cache", expire_after=24*60*60)
# Good symbols = all attributes should work
good_symbols = ['MSFT', 'IWO', 'VFINX', '^GSPC', 'BTC-USD']
good_tickers = [yf.Ticker(symbol, session=session) for symbol in good_symbols]
# Dodgy symbols = Yahoo data incomplete, so exclude from some tests
dodgy_symbols = ["G7W.DU"]
dodgy_tickers = [yf.Ticker(symbol, session=session) for symbol in dodgy_symbols]
symbols = good_symbols + dodgy_symbols
tickers = good_tickers + dodgy_tickers
# Delisted = no data expected but yfinance shouldn't raise exception
delisted_symbols = ["BRK.B", "SDLP"]
delisted_tickers = [yf.Ticker(symbol, session=session) for symbol in delisted_symbols]
class TestTicker(unittest.TestCase):
def setUp(self):
d_today = datetime.date.today()
d_today -= datetime.timedelta(days=30)
self.start_d = datetime.date(d_today.year, d_today.month, 1)
def test_info_history(self):
# always should have info and history for valid symbols
for ticker in tickers:
# always should have info and history for valid symbols
assert(ticker.info is not None and ticker.info != {})
history = ticker.history(period="max")
history = ticker.history(period="1mo")
assert(history.empty is False and history is not None)
histories = yf.download(symbols, period="1mo", session=session)
assert(histories.empty is False and histories is not None)
for ticker in tickers:
assert(ticker.info is not None and ticker.info != {})
history = ticker.history(start=self.start_d)
assert(history.empty is False and history is not None)
histories = yf.download(symbols, start=self.start_d, session=session)
assert(histories.empty is False and histories is not None)
def test_info_history_nofail(self):
# should not throw Exception for delisted tickers, just print a message
for ticker in delisted_tickers:
history = ticker.history(period="1mo")
histories = yf.download(delisted_symbols, period="1mo", session=session)
histories = yf.download(delisted_symbols[0], period="1mo", session=session)
histories = yf.download(delisted_symbols[1], period="1mo")#, session=session)
for ticker in delisted_tickers:
history = ticker.history(start=self.start_d)
histories = yf.download(delisted_symbols, start=self.start_d, session=session)
histories = yf.download(delisted_symbols[0], start=self.start_d, session=session)
histories = yf.download(delisted_symbols[1], start=self.start_d, session=session)
def test_attributes(self):
for ticker in tickers:
ticker.isin
ticker.major_holders
ticker.institutional_holders
ticker.mutualfund_holders
ticker.dividends
ticker.splits
ticker.actions
ticker.info
ticker.info["trailingPegRatio"]
ticker.calendar
ticker.recommendations
ticker.earnings
ticker.quarterly_earnings
ticker.financials
ticker.quarterly_financials
ticker.balance_sheet
ticker.quarterly_balance_sheet
ticker.cashflow
ticker.quarterly_cashflow
ticker.sustainability
ticker.options
ticker.news
ticker.shares
ticker.earnings_history
ticker.earnings_dates
def test_attributes_nofail(self):
# should not throw Exception for delisted tickers, just print a message
for ticker in delisted_tickers:
ticker.isin
ticker.major_holders
ticker.institutional_holders
@@ -56,8 +125,7 @@ class TestTicker(unittest.TestCase):
ticker.earnings_dates
def test_holders(self):
for ticker in tickers:
assert(ticker.info is not None and ticker.info != {})
for ticker in good_tickers:
assert(ticker.major_holders is not None)
assert(ticker.institutional_holders is not None)

View File

@@ -23,6 +23,7 @@ from . import version
from .ticker import Ticker
from .tickers import Tickers
from .multi import download
from .utils import set_tz_cache_location
__version__ = version.version
__author__ = "Ran Aroussi"
@@ -42,4 +43,4 @@ def pdr_override():
pass
__all__ = ['download', 'Ticker', 'Tickers', 'pdr_override']
__all__ = ['download', 'Ticker', 'Tickers', 'pdr_override', 'set_tz_cache_location']

View File

@@ -23,6 +23,7 @@ from __future__ import print_function
import time as _time
import datetime as _datetime
import pytz as _tz
import requests as _requests
import pandas as _pd
import numpy as _np
@@ -53,6 +54,7 @@ class TickerBase():
self._history = None
self._base_url = _BASE_URL_
self._scrape_url = _SCRAPE_URL_
self._tz = None
self._fundamentals = False
self._info = None
@@ -71,18 +73,10 @@ class TickerBase():
self._earnings_dates = None
self._earnings_history = None
self._earnings = {
"yearly": utils.empty_df(),
"quarterly": utils.empty_df()}
self._financials = {
"yearly": utils.empty_df(),
"quarterly": utils.empty_df()}
self._balancesheet = {
"yearly": utils.empty_df(),
"quarterly": utils.empty_df()}
self._cashflow = {
"yearly": utils.empty_df(),
"quarterly": utils.empty_df()}
self._earnings = None
self._financials = None
self._balancesheet = None
self._cashflow = None
# accept isin as ticker
if utils.is_isin(self.ticker):
@@ -106,8 +100,8 @@ class TickerBase():
def history(self, period="1mo", interval="1d",
start=None, end=None, prepost=False, actions=True,
auto_adjust=True, back_adjust=False,
proxy=None, rounding=False, tz=None, timeout=None, **kwargs):
auto_adjust=True, back_adjust=False, keepna=False,
proxy=None, rounding=False, timeout=None, **kwargs):
"""
:Parameters:
period : str
@@ -129,14 +123,14 @@ class TickerBase():
Adjust all OHLC automatically? Default is True
back_adjust: bool
Back-adjusted data to mimic true historical prices
keepna: bool
Keep NaN rows returned by Yahoo?
Default is False
proxy: str
Optional. Proxy server URL scheme. Default is None
rounding: bool
Round values to 2 decimal places?
Optional. Default is False = precision suggested by Yahoo!
tz: str
Optional timezone locale for dates.
(default data is returned as non-localized dates)
timeout: None or float
If not None stops waiting for a response after given number of
seconds. (Can also be a fraction of a second e.g. 0.01)
@@ -147,23 +141,39 @@ class TickerBase():
error message printing to console.
"""
# Work with errors
debug_mode = True
if "debug" in kwargs and isinstance(kwargs["debug"], bool):
debug_mode = kwargs["debug"]
if "many" in kwargs and kwargs["many"]:
# Disable prints with threads, it deadlocks/throws
debug_mode = False
err_msg = "No data found for this date range, symbol may be delisted"
if start or period is None or period.lower() == "max":
# Check can get TZ. Fail => probably delisted
tz = self._get_ticker_tz(debug_mode, proxy, timeout)
if tz is None:
# Every valid ticker has a timezone. Missing = problem
shared._DFS[self.ticker] = utils.empty_df()
shared._ERRORS[self.ticker] = err_msg
if debug_mode:
print('- %s: %s' % (self.ticker, err_msg))
return utils.empty_df()
if end is None:
end = int(_time.time())
elif isinstance(end, _datetime.datetime):
end = int(_time.mktime(end.timetuple()))
else:
end = int(_time.mktime(_time.strptime(str(end), '%Y-%m-%d')))
end = utils._parse_user_dt(end, tz)
if start is None:
if interval == "1m":
start = end - 604800 # Subtract 7 days
else:
start = -631159200
elif isinstance(start, _datetime.datetime):
start = int(_time.mktime(start.timetuple()))
#time stamp of 01/01/1900
start = -2208994789
else:
start = int(_time.mktime(
_time.strptime(str(start), '%Y-%m-%d')))
start = utils._parse_user_dt(start, tz)
params = {"period1": start, "period2": end}
else:
period = period.lower()
@@ -206,17 +216,10 @@ class TickerBase():
except Exception:
pass
# Work with errors
debug_mode = True
if "debug" in kwargs and isinstance(kwargs["debug"], bool):
debug_mode = kwargs["debug"]
err_msg = "No data found for this date range, symbol may be delisted"
if data is None or not type(data) is dict or 'status_code' in data.keys():
shared._DFS[self.ticker] = utils.empty_df()
shared._ERRORS[self.ticker] = err_msg
if "many" not in kwargs and debug_mode:
if debug_mode:
print('- %s: %s' % (self.ticker, err_msg))
return utils.empty_df()
@@ -224,7 +227,7 @@ class TickerBase():
err_msg = data["chart"]["error"]["description"]
shared._DFS[self.ticker] = utils.empty_df()
shared._ERRORS[self.ticker] = err_msg
if "many" not in kwargs and debug_mode:
if debug_mode:
print('- %s: %s' % (self.ticker, err_msg))
return shared._DFS[self.ticker]
@@ -232,17 +235,22 @@ class TickerBase():
not data["chart"]["result"]:
shared._DFS[self.ticker] = utils.empty_df()
shared._ERRORS[self.ticker] = err_msg
if "many" not in kwargs and debug_mode:
if debug_mode:
print('- %s: %s' % (self.ticker, err_msg))
return shared._DFS[self.ticker]
# parse quotes
try:
quotes = utils.parse_quotes(data["chart"]["result"][0], tz)
quotes = utils.parse_quotes(data["chart"]["result"][0])
# Yahoo bug fix - it often appends latest price even if after end date
if end and not quotes.empty:
endDt = _pd.to_datetime(_datetime.datetime.utcfromtimestamp(end))
if quotes.index[quotes.shape[0]-1] >= endDt:
quotes = quotes.iloc[0:quotes.shape[0]-1]
except Exception:
shared._DFS[self.ticker] = utils.empty_df()
shared._ERRORS[self.ticker] = err_msg
if "many" not in kwargs and debug_mode:
if debug_mode:
print('- %s: %s' % (self.ticker, err_msg))
return shared._DFS[self.ticker]
@@ -278,7 +286,7 @@ class TickerBase():
err_msg = "back_adjust failed with %s" % e
shared._DFS[self.ticker] = utils.empty_df()
shared._ERRORS[self.ticker] = err_msg
if "many" not in kwargs and debug_mode:
if debug_mode:
print('- %s: %s' % (self.ticker, err_msg))
if rounding:
@@ -286,16 +294,10 @@ class TickerBase():
"chart"]["result"][0]["meta"]["priceHint"])
quotes['Volume'] = quotes['Volume'].fillna(0).astype(_np.int64)
quotes.dropna(inplace=True)
# actions
dividends, splits = utils.parse_actions(data["chart"]["result"][0], tz)
dividends, splits = utils.parse_actions(data["chart"]["result"][0])
# Yahoo bug fix - it often appends latest price even if after end date
if end and not quotes.empty:
endDt = _pd.to_datetime(_datetime.datetime.fromtimestamp(end))
if quotes.index[quotes.shape[0]-1] > endDt:
quotes = quotes.iloc[0:quotes.shape[0]-1]
tz_exchange = data["chart"]["result"][0]["meta"]["exchangeTimezoneName"]
# combine
df = _pd.concat([quotes, dividends, splits], axis=1, sort=True)
@@ -303,32 +305,245 @@ class TickerBase():
df["Stock Splits"].fillna(0, inplace=True)
# index eod/intraday
df.index = df.index.tz_localize("UTC").tz_convert(
data["chart"]["result"][0]["meta"]["exchangeTimezoneName"])
df.index = df.index.tz_localize("UTC").tz_convert(tz_exchange)
df = utils.fix_Yahoo_dst_issue(df, params["interval"])
if params["interval"][-1] == "m":
df.index.name = "Datetime"
elif params["interval"] == "1h":
pass
else:
df.index = _pd.to_datetime(df.index.date)
if tz is not None:
df.index = df.index.tz_localize(tz)
# If a midnight is during DST transition hour when clocks roll back,
# meaning clock hits midnight twice, then use the 2nd (ambiguous=True)
df.index = _pd.to_datetime(df.index.date).tz_localize(tz_exchange, ambiguous=True, nonexistent='shift_forward')
df.index.name = "Date"
# duplicates and missing rows cleanup
df.dropna(how='all', inplace=True)
df = df[~df.index.duplicated(keep='first')]
self._history = df.copy()
if not actions:
df.drop(columns=["Dividends", "Stock Splits"], inplace=True)
df = df.drop(columns=["Dividends", "Stock Splits"])
if not keepna:
mask_nan_or_zero = (df.isna()|(df==0)).all(axis=1)
df = df.drop(mask_nan_or_zero.index[mask_nan_or_zero])
return df
# ------------------------
def _get_ticker_tz(self, debug_mode, proxy, timeout):
if not self._tz is None:
return self._tz
tkr_tz = utils.cache_lookup_tkr_tz(self.ticker)
if tkr_tz is not None:
invalid_value = isinstance(tkr_tz, str)
if not invalid_value:
try:
_tz.timezone(tz)
except:
invalid_value = True
if invalid_value:
# Clear from cache and force re-fetch
utils.cache_store_tkr_tz(self.ticker, None)
tkr_tz = None
if tkr_tz is None:
tkr_tz = self._fetch_ticker_tz(debug_mode, proxy, timeout)
if tkr_tz is not None:
try:
utils.cache_store_tkr_tz(self.ticker, tkr_tz)
except PermissionError:
# System probably read-only, so cannot cache
pass
self._tz = tkr_tz
return tkr_tz
def _fetch_ticker_tz(self, debug_mode, proxy, timeout):
# Query Yahoo for basic price data just to get returned timezone
params = {"range":"1d", "interval":"1d"}
# setup proxy in requests format
if proxy is not None:
if isinstance(proxy, dict) and "https" in proxy:
proxy = proxy["https"]
proxy = {"https": proxy}
# Getting data from json
url = "{}/v8/finance/chart/{}".format(self._base_url, self.ticker)
session = self.session or _requests
try:
data = session.get(url=url, params=params, proxies=proxy, headers=utils.user_agent_headers, timeout=timeout)
data = data.json()
except Exception as e:
if debug_mode:
print("Failed to get ticker '{}' reason: {}".format(self.ticker, e))
return None
else:
error = data.get('chart', {}).get('error', None)
if error:
# explicit error from yahoo API
if debug_mode:
print("Got error from yahoo api for ticker {}, Error: {}".format(self.ticker, error))
else:
try:
return data["chart"]["result"][0]["meta"]["exchangeTimezoneName"]
except Exception as err:
if debug_mode:
print("Could not get exchangeTimezoneName for ticker '{}' reason: {}".format(self.ticker, err))
print("Got response: ")
print("-------------")
print(" {}".format(data))
print("-------------")
return None
def _get_info(self, proxy=None):
# setup proxy in requests format
if proxy is not None:
if isinstance(proxy, dict) and "https" in proxy:
proxy = proxy["https"]
proxy = {"https": proxy}
if (self._info is None) or (self._sustainability is None) or (self._recommendations is None):
## Need to fetch
pass
else:
return
ticker_url = "{}/{}".format(self._scrape_url, self.ticker)
# get info and sustainability
data = utils.get_json(ticker_url, proxy, self.session)
# sustainability
d = {}
try:
if isinstance(data.get('esgScores'), dict):
for item in data['esgScores']:
if not isinstance(data['esgScores'][item], (dict, list)):
d[item] = data['esgScores'][item]
s = _pd.DataFrame(index=[0], data=d)[-1:].T
s.columns = ['Value']
s.index.name = '%.f-%.f' % (
s[s.index == 'ratingYear']['Value'].values[0],
s[s.index == 'ratingMonth']['Value'].values[0])
self._sustainability = s[~s.index.isin(
['maxAge', 'ratingYear', 'ratingMonth'])]
else:
self._sustainability = utils.empty_df()
except Exception:
self._sustainability = utils.empty_df()
pass
# info (be nice to python 2)
self._info = {}
try:
items = ['summaryProfile', 'financialData', 'quoteType',
'defaultKeyStatistics', 'assetProfile', 'summaryDetail']
for item in items:
if isinstance(data.get(item), dict):
self._info.update(data[item])
except Exception:
pass
# For ETFs, provide this valuable data: the top holdings of the ETF
try:
if 'topHoldings' in data:
self._info.update(data['topHoldings'])
except Exception:
pass
try:
if not isinstance(data.get('summaryDetail'), dict):
# For some reason summaryDetail did not give any results. The price dict usually has most of the same info
self._info.update(data.get('price', {}))
except Exception:
pass
try:
# self._info['regularMarketPrice'] = self._info['regularMarketOpen']
self._info['regularMarketPrice'] = data.get('price', {}).get(
'regularMarketPrice', self._info.get('regularMarketOpen', None))
except Exception:
pass
try:
self._info['preMarketPrice'] = data.get('price', {}).get(
'preMarketPrice', self._info.get('preMarketPrice', None))
except Exception:
pass
self._info['logo_url'] = ""
try:
if not 'website' in self._info:
self._info['logo_url'] = 'https://logo.clearbit.com/%s.com' % self._info['shortName'].split(' ')[0].split(',')[0]
else:
domain = self._info['website'].split(
'://')[1].split('/')[0].replace('www.', '')
self._info['logo_url'] = 'https://logo.clearbit.com/%s' % domain
except Exception:
pass
# events
try:
cal = _pd.DataFrame(
data['calendarEvents']['earnings'])
cal['earningsDate'] = _pd.to_datetime(
cal['earningsDate'], unit='s')
self._calendar = cal.T
self._calendar.index = utils.camel2title(self._calendar.index)
self._calendar.columns = ['Value']
except Exception:
pass
# analyst recommendations
try:
rec = _pd.DataFrame(
data['upgradeDowngradeHistory']['history'])
rec['earningsDate'] = _pd.to_datetime(
rec['epochGradeDate'], unit='s')
rec.set_index('earningsDate', inplace=True)
rec.index.name = 'Date'
rec.columns = utils.camel2title(rec.columns)
self._recommendations = rec[[
'Firm', 'To Grade', 'From Grade', 'Action']].sort_index()
except Exception:
self._recommendations = utils.empty_df()
pass
# Complementary key-statistics. For now just want 'trailing PEG ratio'
session = self.session or _requests
keys = {"trailingPegRatio"}
if len(keys)>0:
# For just one/few variable is faster to query directly:
url = "https://query1.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{}?symbol={}".format(self.ticker, self.ticker)
for k in keys:
url += "&type="+k
# Request 6 months of data
url += "&period1={}".format(int((_datetime.datetime.now()-_datetime.timedelta(days=365//2)).timestamp()))
url += "&period2={}".format(int((_datetime.datetime.now()+_datetime.timedelta(days=1)).timestamp()))
json_str = session.get(url=url, proxies=proxy, headers=utils.user_agent_headers).text
json_data = _json.loads(json_str)
key_stats = json_data["timeseries"]["result"][0]
if k not in key_stats:
# Yahoo website prints N/A, indicates Yahoo lacks necessary data to calculate
v = None
else:
# Select most recent (last) raw value in list:
v = key_stats[k][-1]["reportedValue"]["raw"]
self._info[k] = v
def _get_fundamentals(self, proxy=None):
def cleanup(data):
df = _pd.DataFrame(data).drop(columns=['maxAge'])
@@ -362,9 +577,6 @@ class TickerBase():
ticker_url = "{}/{}".format(self._scrape_url, self.ticker)
# get info and sustainability
data = utils.get_json(ticker_url, proxy, self.session)
# holders
try:
resp = utils.get_html(ticker_url + '/holders', proxy, self.session)
@@ -401,101 +613,16 @@ class TickerBase():
self._mutualfund_holders['% Out'] = self._mutualfund_holders[
'% Out'].str.replace('%', '').astype(float) / 100
# sustainability
d = {}
try:
if isinstance(data.get('esgScores'), dict):
for item in data['esgScores']:
if not isinstance(data['esgScores'][item], (dict, list)):
d[item] = data['esgScores'][item]
s = _pd.DataFrame(index=[0], data=d)[-1:].T
s.columns = ['Value']
s.index.name = '%.f-%.f' % (
s[s.index == 'ratingYear']['Value'].values[0],
s[s.index == 'ratingMonth']['Value'].values[0])
self._sustainability = s[~s.index.isin(
['maxAge', 'ratingYear', 'ratingMonth'])]
except Exception:
pass
# info (be nice to python 2)
self._info = {}
try:
items = ['summaryProfile', 'financialData', 'quoteType',
'defaultKeyStatistics', 'assetProfile', 'summaryDetail']
for item in items:
if isinstance(data.get(item), dict):
self._info.update(data[item])
except Exception:
pass
# For ETFs, provide this valuable data: the top holdings of the ETF
try:
if 'topHoldings' in data:
self._info.update(data['topHoldings'])
except Exception:
pass
try:
if not isinstance(data.get('summaryDetail'), dict):
# For some reason summaryDetail did not give any results. The price dict usually has most of the same info
self._info.update(data.get('price', {}))
except Exception:
pass
try:
# self._info['regularMarketPrice'] = self._info['regularMarketOpen']
self._info['regularMarketPrice'] = data.get('price', {}).get(
'regularMarketPrice', self._info.get('regularMarketOpen', None))
except Exception:
pass
try:
self._info['preMarketPrice'] = data.get('price', {}).get(
'preMarketPrice', self._info.get('preMarketPrice', None))
except Exception:
pass
self._info['logo_url'] = ""
try:
domain = self._info['website'].split(
'://')[1].split('/')[0].replace('www.', '')
self._info['logo_url'] = 'https://logo.clearbit.com/%s' % domain
except Exception:
pass
# events
try:
cal = _pd.DataFrame(
data['calendarEvents']['earnings'])
cal['earningsDate'] = _pd.to_datetime(
cal['earningsDate'], unit='s')
self._calendar = cal.T
self._calendar.index = utils.camel2title(self._calendar.index)
self._calendar.columns = ['Value']
except Exception:
pass
# analyst recommendations
try:
rec = _pd.DataFrame(
data['upgradeDowngradeHistory']['history'])
rec['earningsDate'] = _pd.to_datetime(
rec['epochGradeDate'], unit='s')
rec.set_index('earningsDate', inplace=True)
rec.index.name = 'Date'
rec.columns = utils.camel2title(rec.columns)
self._recommendations = rec[[
'Firm', 'To Grade', 'From Grade', 'Action']].sort_index()
except Exception:
pass
self._get_info(proxy)
# get fundamentals
data = utils.get_json(ticker_url + '/financials', proxy, self.session)
# generic patterns
self._earnings = {"yearly": utils.empty_df(), "quarterly": utils.empty_df()}
self._cashflow = {"yearly": utils.empty_df(), "quarterly": utils.empty_df()}
self._balancesheet = {"yearly": utils.empty_df(), "quarterly": utils.empty_df()}
self._financials = {"yearly": utils.empty_df(), "quarterly": utils.empty_df()}
for key in (
(self._cashflow, 'cashflowStatement', 'cashflowStatements'),
(self._balancesheet, 'balanceSheet', 'balanceSheetStatements'),
@@ -576,59 +703,17 @@ class TickerBase():
except Exception:
pass
# Complementary key-statistics (currently fetching the important trailingPegRatio which is the value shown in the website)
res = {}
try:
my_headers = {'user-agent': 'curl/7.55.1', 'accept': 'application/json', 'content-type': 'application/json',
'referer': 'https://finance.yahoo.com/', 'cache-control': 'no-cache', 'connection': 'close'}
p = _re.compile(r'root\.App\.main = (.*);')
r = _requests.session().get('https://finance.yahoo.com/quote/{}/key-statistics?p={}'.format(self.ticker,
self.ticker), headers=my_headers)
q_results = {}
my_qs_keys = ['pegRatio'] # QuoteSummaryStore
# , 'quarterlyPegRatio'] # QuoteTimeSeriesStore
my_ts_keys = ['trailingPegRatio']
# Complementary key-statistics
data = _json.loads(p.findall(r.text)[0])
key_stats = data['context']['dispatcher']['stores']['QuoteTimeSeriesStore']
q_results.setdefault(self.ticker, [])
for i in my_ts_keys:
# j=0
try:
# res = {i: key_stats['timeSeries'][i][1]['reportedValue']['raw']}
# We need to loop over multiple items, if they exist: 0,1,2,..
zzz = key_stats['timeSeries'][i]
for j in range(len(zzz)):
if key_stats['timeSeries'][i][j]:
res = {i: key_stats['timeSeries']
[i][j]['reportedValue']['raw']}
q_results[self.ticker].append(res)
# print(res)
# q_results[ticker].append(res)
except:
q_results[ticker].append({i: np.nan})
res = {'Company': ticker}
q_results[ticker].append(res)
except Exception:
pass
if 'trailingPegRatio' in res:
self._info['trailingPegRatio'] = res['trailingPegRatio']
self._fundamentals = True
def get_recommendations(self, proxy=None, as_dict=False, *args, **kwargs):
self._get_fundamentals(proxy=proxy)
self._get_info(proxy)
data = self._recommendations
if as_dict:
return data.to_dict()
return data
def get_calendar(self, proxy=None, as_dict=False, *args, **kwargs):
self._get_fundamentals(proxy=proxy)
self._get_info(proxy)
data = self._calendar
if as_dict:
return data.to_dict()
@@ -658,14 +743,14 @@ class TickerBase():
return data
def get_info(self, proxy=None, as_dict=False, *args, **kwargs):
self._get_fundamentals(proxy=proxy)
self._get_info(proxy)
data = self._info
if as_dict:
return data.to_dict()
return data
def get_sustainability(self, proxy=None, as_dict=False, *args, **kwargs):
self._get_fundamentals(proxy=proxy)
self._get_info(proxy)
data = self._sustainability
if as_dict:
return data.to_dict()
@@ -763,6 +848,10 @@ class TickerBase():
self.get_info(proxy=proxy)
if "shortName" in self._info:
q = self._info['shortName']
if q is None:
err_msg = "Cannot map to ISIN code, symbol may be delisted"
print('- %s: %s' % (self.ticker, err_msg))
return None
url = 'https://markets.businessinsider.com/ajax/' \
'SearchController_Suggest?max_results=25&query=%s' \
@@ -861,8 +950,10 @@ class TickerBase():
dates = _pd.concat([dates, data], axis=0)
page_offset += page_size
if dates is None:
raise Exception("No data found, symbol may be delisted")
if (dates is None) or dates.shape[0]==0:
err_msg = "No earnings dates found, symbol may be delisted"
print('- %s: %s' % (self.ticker, err_msg))
return None
dates = dates.reset_index(drop=True)
# Drop redundant columns

View File

@@ -29,8 +29,8 @@ from . import Ticker, utils
from . import shared
def download(tickers, start=None, end=None, actions=False, threads=True,
group_by='column', auto_adjust=False, back_adjust=False,
def download(tickers, start=None, end=None, actions=False, threads=True, ignore_tz=True,
group_by='column', auto_adjust=False, back_adjust=False, keepna=False,
progress=True, period="max", show_errors=True, interval="1d", prepost=False,
proxy=None, rounding=False, timeout=None, **kwargs):
"""Download yahoo tickers
@@ -56,10 +56,16 @@ def download(tickers, start=None, end=None, actions=False, threads=True,
Default is False
auto_adjust: bool
Adjust all OHLC automatically? Default is False
keepna: bool
Keep NaN rows returned by Yahoo?
Default is False
actions: bool
Download dividend + stock splits data. Default is False
threads: bool / int
How many threads to use for mass downloading. Default is True
ignore_tz: bool
When combining from different timezones, ignore that part of datetime.
Default is True
proxy: str
Optional. Proxy server URL scheme. Default is None
rounding: bool
@@ -105,7 +111,7 @@ def download(tickers, start=None, end=None, actions=False, threads=True,
_download_one_threaded(ticker, period=period, interval=interval,
start=start, end=end, prepost=prepost,
actions=actions, auto_adjust=auto_adjust,
back_adjust=back_adjust,
back_adjust=back_adjust, keepna=keepna,
progress=(progress and i > 0), proxy=proxy,
rounding=rounding, timeout=timeout)
while len(shared._DFS) < len(tickers):
@@ -117,7 +123,7 @@ def download(tickers, start=None, end=None, actions=False, threads=True,
data = _download_one(ticker, period=period, interval=interval,
start=start, end=end, prepost=prepost,
actions=actions, auto_adjust=auto_adjust,
back_adjust=back_adjust, proxy=proxy,
back_adjust=back_adjust, keepna=keepna, proxy=proxy,
rounding=rounding, timeout=timeout)
shared._DFS[ticker.upper()] = data
if progress:
@@ -133,16 +139,21 @@ def download(tickers, start=None, end=None, actions=False, threads=True,
print("\n".join(['- %s: %s' %
v for v in list(shared._ERRORS.items())]))
if ignore_tz:
for tkr in shared._DFS.keys():
if (shared._DFS[tkr] is not None) and (shared._DFS[tkr].shape[0]>0):
shared._DFS[tkr].index = shared._DFS[tkr].index.tz_localize(None)
if len(tickers) == 1:
ticker = tickers[0]
return shared._DFS[shared._ISINS.get(ticker, ticker)]
try:
data = _pd.concat(shared._DFS.values(), axis=1,
data = _pd.concat(shared._DFS.values(), axis=1, sort=True,
keys=shared._DFS.keys())
except Exception:
_realign_dfs()
data = _pd.concat(shared._DFS.values(), axis=1,
data = _pd.concat(shared._DFS.values(), axis=1, sort=True,
keys=shared._DFS.keys())
# switch names back to isins if applicable
@@ -183,11 +194,11 @@ def _download_one_threaded(ticker, start=None, end=None,
auto_adjust=False, back_adjust=False,
actions=False, progress=True, period="max",
interval="1d", prepost=False, proxy=None,
rounding=False, timeout=None):
keepna=False, rounding=False, timeout=None):
data = _download_one(ticker, start, end, auto_adjust, back_adjust,
actions, period, interval, prepost, proxy, rounding,
timeout)
keepna, timeout, many=True)
shared._DFS[ticker.upper()] = data
if progress:
shared._PROGRESS_BAR.animate()
@@ -197,11 +208,11 @@ def _download_one(ticker, start=None, end=None,
auto_adjust=False, back_adjust=False,
actions=False, period="max", interval="1d",
prepost=False, proxy=None, rounding=False,
timeout=None):
keepna=False, timeout=None, many=False):
return Ticker(ticker).history(period=period, interval=interval,
start=start, end=end, prepost=prepost,
actions=actions, auto_adjust=auto_adjust,
back_adjust=back_adjust, proxy=proxy,
rounding=rounding, many=True,
timeout=timeout)
rounding=rounding, keepna=keepna, timeout=timeout,
many=many)

View File

@@ -21,11 +21,30 @@
from __future__ import print_function
import datetime as _datetime
import pytz as _tz
import requests as _requests
import re as _re
import pandas as _pd
import numpy as _np
import sys as _sys
import os as _os
import appdirs as _ad
from base64 import b64decode
import hashlib
usePycryptodome = False # slightly faster
# usePycryptodome = True
if usePycryptodome:
# NOTE: if decide to use 'pycryptodome', set min version to 3.6.6
from Crypto.Cipher import AES
from Crypto.Util.Padding import unpad
else:
from cryptography.hazmat.primitives import padding
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from threading import Lock
mutex = Lock()
try:
import ujson as _json
@@ -102,24 +121,112 @@ def get_html(url, proxy=None, session=None):
return html
def decrypt_cryptojs_stores(data):
"""
Yahoo has started encrypting data stores, this method decrypts it.
:param data: Python dict of the json data
:return: The decrypted string data in data['context']['dispatcher']['stores']
"""
_cs = data["_cs"]
# Assumes _cr has format like: '{"words":[-449732894,601032952,157396918,2056341829],"sigBytes":16}';
_cr = _json.loads(data["_cr"])
_cr = b"".join(int.to_bytes(i, length=4, byteorder="big", signed=True) for i in _cr["words"])
password = hashlib.pbkdf2_hmac("sha1", _cs.encode("utf8"), _cr, 1, dklen=32).hex()
encrypted_stores = data['context']['dispatcher']['stores']
encrypted_stores = b64decode(encrypted_stores)
assert encrypted_stores[0:8] == b"Salted__"
salt = encrypted_stores[8:16]
encrypted_stores = encrypted_stores[16:]
key, iv = _EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="md5")
if usePycryptodome:
cipher = AES.new(key, AES.MODE_CBC, iv=iv)
plaintext = cipher.decrypt(encrypted_stores)
plaintext = unpad(plaintext, 16, style="pkcs7")
else:
cipher = Cipher(algorithms.AES(key), modes.CBC(iv))
decryptor = cipher.decryptor()
plaintext = decryptor.update(encrypted_stores) + decryptor.finalize()
unpadder = padding.PKCS7(128).unpadder()
plaintext = unpadder.update(plaintext) + unpadder.finalize()
plaintext = plaintext.decode("utf-8")
return plaintext
def _EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="md5") -> tuple:
"""OpenSSL EVP Key Derivation Function
Args:
password (Union[str, bytes, bytearray]): Password to generate key from.
salt (Union[bytes, bytearray]): Salt to use.
keySize (int, optional): Output key length in bytes. Defaults to 32.
ivSize (int, optional): Output Initialization Vector (IV) length in bytes. Defaults to 16.
iterations (int, optional): Number of iterations to perform. Defaults to 1.
hashAlgorithm (str, optional): Hash algorithm to use for the KDF. Defaults to 'md5'.
Returns:
key, iv: Derived key and Initialization Vector (IV) bytes.
Taken from: https://gist.github.com/rafiibrahim8/0cd0f8c46896cafef6486cb1a50a16d3
OpenSSL original code: https://github.com/openssl/openssl/blob/master/crypto/evp/evp_key.c#L78
"""
assert iterations > 0, "Iterations can not be less than 1."
if isinstance(password, str):
password = password.encode("utf-8")
final_length = keySize + ivSize
key_iv = b""
block = None
while len(key_iv) < final_length:
hasher = hashlib.new(hashAlgorithm)
if block:
hasher.update(block)
hasher.update(password)
hasher.update(salt)
block = hasher.digest()
for _ in range(1, iterations):
block = hashlib.new(hashAlgorithm, block).digest()
key_iv += block
key, iv = key_iv[:keySize], key_iv[keySize:final_length]
return key, iv
def get_json(url, proxy=None, session=None):
session = session or _requests
html = session.get(url=url, proxies=proxy, headers=user_agent_headers).text
if "QuoteSummaryStore" not in html:
html = session.get(url=url, proxies=proxy).text
if "QuoteSummaryStore" not in html:
return {}
if not "root.App.main =" in html:
return {}
json_str = html.split('root.App.main =')[1].split(
'(this)')[0].split(';\n}')[0].strip()
data = _json.loads(json_str)[
'context']['dispatcher']['stores']['QuoteSummaryStore']
data = _json.loads(json_str)
if "_cs" in data and "_cr" in data:
data_stores = _json.loads(decrypt_cryptojs_stores(data))
else:
if "context" in data and "dispatcher" in data["context"]:
# Keep old code, just in case
data_stores = data['context']['dispatcher']['stores']
else:
data_stores = data
if not 'QuoteSummaryStore' in data_stores:
# Problem in data. Either delisted, or Yahoo spam triggered
return {}
data = data_stores['QuoteSummaryStore']
# add data about Shares Outstanding for companies' tickers if they are available
try:
data['annualBasicAverageShares'] = _json.loads(
json_str)['context']['dispatcher']['stores'][
'QuoteTimeSeriesStore']['timeSeries']['annualBasicAverageShares']
data['annualBasicAverageShares'] = \
data_stores['QuoteTimeSeriesStore']['timeSeries']['annualBasicAverageShares']
except Exception:
pass
@@ -135,6 +242,23 @@ def camel2title(o):
return [_re.sub("([a-z])([A-Z])", r"\g<1> \g<2>", i).title() for i in o]
def _parse_user_dt(dt, exchange_tz):
if isinstance(dt, int):
## Should already be epoch, test with conversion:
_datetime.datetime.fromtimestamp(dt)
else:
# Convert str/date -> datetime, set tzinfo=exchange, get timestamp:
if isinstance(dt, str):
dt = _datetime.datetime.strptime(str(dt), '%Y-%m-%d')
if isinstance(dt, _datetime.date) and not isinstance(dt, _datetime.datetime):
dt = _datetime.datetime.combine(dt, _datetime.time(0))
if isinstance(dt, _datetime.datetime) and dt.tzinfo is None:
# Assume user is referring to exchange's timezone
dt = _tz.timezone(exchange_tz).localize(dt)
dt = int(dt.timestamp())
return dt
def auto_adjust(data):
df = data.copy()
ratio = df["Close"] / df["Adj Close"]
@@ -176,7 +300,7 @@ def back_adjust(data):
return df[["Open", "High", "Low", "Close", "Volume"]]
def parse_quotes(data, tz=None):
def parse_quotes(data):
timestamps = data["timestamp"]
ohlc = data["indicators"]["quote"][0]
volumes = ohlc["volume"]
@@ -199,13 +323,10 @@ def parse_quotes(data, tz=None):
quotes.index = _pd.to_datetime(timestamps, unit="s")
quotes.sort_index(inplace=True)
if tz is not None:
quotes.index = quotes.index.tz_localize(tz)
return quotes
def parse_actions(data, tz=None):
def parse_actions(data):
dividends = _pd.DataFrame(
columns=["Dividends"], index=_pd.DatetimeIndex([]))
splits = _pd.DataFrame(
@@ -218,8 +339,6 @@ def parse_actions(data, tz=None):
dividends.set_index("date", inplace=True)
dividends.index = _pd.to_datetime(dividends.index, unit="s")
dividends.sort_index(inplace=True)
if tz is not None:
dividends.index = dividends.index.tz_localize(tz)
dividends.columns = ["Dividends"]
@@ -229,8 +348,6 @@ def parse_actions(data, tz=None):
splits.set_index("date", inplace=True)
splits.index = _pd.to_datetime(splits.index, unit="s")
splits.sort_index(inplace=True)
if tz is not None:
splits.index = splits.index.tz_localize(tz)
splits["Stock Splits"] = splits["numerator"] / \
splits["denominator"]
splits = splits["Stock Splits"]
@@ -238,6 +355,19 @@ def parse_actions(data, tz=None):
return dividends, splits
def fix_Yahoo_dst_issue(df, interval):
if interval in ["1d","1w","1wk"]:
# These intervals should start at time 00:00. But for some combinations of date and timezone,
# Yahoo has time off by few hours (e.g. Brazil 23:00 around Jan-2022). Suspect DST problem.
# The clue is (a) minutes=0 and (b) hour near 0.
# Obviously Yahoo meant 00:00, so ensure this doesn't affect date conversion:
f_pre_midnight = (df.index.minute == 0) & (df.index.hour.isin([22,23]))
dst_error_hours = _np.array([0]*df.shape[0])
dst_error_hours[f_pre_midnight] = 24-df.index[f_pre_midnight].hour
df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
return df
class ProgressBar:
def __init__(self, iterations, text='completed'):
self.text = text
@@ -286,3 +416,55 @@ class ProgressBar:
def __str__(self):
return str(self.prog_bar)
# Simple file cache of ticker->timezone:
_cache_dp = None
def get_cache_dirpath():
if _cache_dp is None:
dp = _os.path.join(_ad.user_cache_dir(), "py-yfinance")
else:
dp = _os.path.join(_cache_dp, "py-yfinance")
return dp
def set_tz_cache_location(dp):
global _cache_dp
_cache_dp = dp
def cache_lookup_tkr_tz(tkr):
fp = _os.path.join(get_cache_dirpath(), "tkr-tz.csv")
if not _os.path.isfile(fp):
return None
mutex.acquire()
df = _pd.read_csv(fp, index_col="Ticker", on_bad_lines="skip")
mutex.release()
if tkr in df.index:
return df.loc[tkr,"Tz"]
else:
return None
def cache_store_tkr_tz(tkr,tz):
dp = get_cache_dirpath()
fp = _os.path.join(dp, "tkr-tz.csv")
mutex.acquire()
if not _os.path.isdir(dp):
_os.makedirs(dp)
if (not _os.path.isfile(fp)) and (tz is not None):
df = _pd.DataFrame({"Tz":[tz]}, index=[tkr])
df.index.name = "Ticker"
df.to_csv(fp)
else:
df = _pd.read_csv(fp, index_col="Ticker", on_bad_lines="skip")
if tz is None:
# Delete if in cache:
if tkr in df.index:
df.drop(tkr).to_csv(fp)
else:
if tkr in df.index:
raise Exception("Tkr {} tz already in cache".format(tkr))
df.loc[tkr,"Tz"] = tz
df.to_csv(fp)
mutex.release()

View File

@@ -1 +1 @@
version = "0.1.74"
version = "0.1.96"