Compare commits
108 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dfffa6a551 | ||
|
|
787b89c269 | ||
|
|
882f8b3367 | ||
|
|
338a94a8f3 | ||
|
|
e108a543fa | ||
|
|
071c3937b5 | ||
|
|
a279d06810 | ||
|
|
80db9dfe3c | ||
|
|
6bb23e05c2 | ||
|
|
edf2f69b62 | ||
|
|
ce4c2e457d | ||
|
|
4fc15251a0 | ||
|
|
c4600d6bd9 | ||
|
|
14a839582d | ||
|
|
3ae0434567 | ||
|
|
f24dab2f26 | ||
|
|
b47adf0a90 | ||
|
|
3537ec3e4b | ||
|
|
6a306b0353 | ||
|
|
6e3282badb | ||
|
|
829683ca02 | ||
|
|
3011cb324d | ||
|
|
366cfc0795 | ||
|
|
cbd4b924b8 | ||
|
|
56759e3f3c | ||
|
|
c193428b38 | ||
|
|
a625d9e9c5 | ||
|
|
36e80a73f7 | ||
|
|
cdae1cf226 | ||
|
|
bca569318e | ||
|
|
d11cd85a66 | ||
|
|
2d32a6e204 | ||
|
|
bad6456a44 | ||
|
|
1687ae66ab | ||
|
|
ddc34348d9 | ||
|
|
1d74cfeb19 | ||
|
|
1589d07b56 | ||
|
|
d261237320 | ||
|
|
66af3080dd | ||
|
|
9d396b9559 | ||
|
|
23b6ad12c1 | ||
|
|
22131e9fc7 | ||
|
|
e99e61f95a | ||
|
|
a3fe95ea27 | ||
|
|
000cb70bcb | ||
|
|
c8d9d06e75 | ||
|
|
a5e07a0375 | ||
|
|
a0a12bcf4c | ||
|
|
c49cf626bb | ||
|
|
fa6f3fc537 | ||
|
|
34dfe944d9 | ||
|
|
9619839bf5 | ||
|
|
90e00a71ca | ||
|
|
f525ee2f5e | ||
|
|
ef12c8b600 | ||
|
|
42e6d0894e | ||
|
|
de1c3c091b | ||
|
|
c6c0fa3347 | ||
|
|
75c823a72c | ||
|
|
f1ad8f0061 | ||
|
|
b27cc0cf40 | ||
|
|
1d7f8139d6 | ||
|
|
01ef1bb813 | ||
|
|
1db6be75b8 | ||
|
|
7902ec8667 | ||
|
|
ff42a3ac87 | ||
|
|
51f2c7301d | ||
|
|
632a16670a | ||
|
|
fea0dca6f4 | ||
|
|
c7e95152a0 | ||
|
|
a52e972d04 | ||
|
|
a197d9f78e | ||
|
|
dbb9bbfbf3 | ||
|
|
a7b053addd | ||
|
|
e8ca256c10 | ||
|
|
f651dd1e93 | ||
|
|
f40cf0aae1 | ||
|
|
200f57c458 | ||
|
|
e5d45eaa85 | ||
|
|
42b77a9b54 | ||
|
|
42e5751705 | ||
|
|
bca005a2c0 | ||
|
|
ca891bb187 | ||
|
|
0939ff3c78 | ||
|
|
6f5c5635be | ||
|
|
809622e426 | ||
|
|
eec1f3dbad | ||
|
|
1de789ad72 | ||
|
|
cd68ff68c6 | ||
|
|
9673970f45 | ||
|
|
6ea69a70ac | ||
|
|
c723a5ab44 | ||
|
|
50741d1409 | ||
|
|
69d0dcd62b | ||
|
|
5c9348f255 | ||
|
|
a472546e7b | ||
|
|
c914f1f183 | ||
|
|
92c82342fe | ||
|
|
7ae08b04f3 | ||
|
|
4b50f1e81c | ||
|
|
1ed58be749 | ||
|
|
375b4f9376 | ||
|
|
b6b4426ca9 | ||
|
|
149ebe46db | ||
|
|
d80b27cfde | ||
|
|
36e277317b | ||
|
|
0e1ea4d2c6 | ||
|
|
2d96c383ef |
5
.github/ISSUE_TEMPLATE/bug_report.md
vendored
5
.github/ISSUE_TEMPLATE/bug_report.md
vendored
@@ -14,4 +14,7 @@ Before posting an issue - please upgrade to the latest version and confirm the i
|
||||
Upgrade using:
|
||||
`$ pip install yfinance --upgrade --no-cache-dir`
|
||||
|
||||
Bug still there? Delete this content and submit your bug report here...
|
||||
Bug still there? Delete this content and submit your bug report here and provide the following, as best you can:
|
||||
|
||||
- Simple code that reproduces your problem
|
||||
- The error message
|
||||
|
||||
@@ -1,6 +1,88 @@
|
||||
Change Log
|
||||
===========
|
||||
|
||||
0.1.96
|
||||
------
|
||||
- Fix info[] not caching #1258
|
||||
|
||||
0.1.95
|
||||
------
|
||||
- Fix info[] bug #1257
|
||||
|
||||
0.1.94
|
||||
------
|
||||
- Fix delisted ticker info[]
|
||||
|
||||
0.1.93
|
||||
------
|
||||
- Fix Ticker.shares
|
||||
|
||||
0.1.92
|
||||
------
|
||||
- Decrypt new Yahoo encryption #1255
|
||||
|
||||
0.1.90
|
||||
------
|
||||
- Restore lxml req, increase min ver #1237
|
||||
|
||||
0.1.89
|
||||
------
|
||||
- Remove unused incompatible dependency #1222
|
||||
- Fix minimum Pandas version #1230
|
||||
|
||||
0.1.87
|
||||
------
|
||||
- Fix localizing midnight when non-existent (DST) #1176
|
||||
- Fix thread deadlock in bpython #1163
|
||||
|
||||
0.1.86
|
||||
------
|
||||
- Fix 'trailingPegRatio' #1141
|
||||
- Improve handling delisted tickers #1142
|
||||
- Fix corrupt tkr-tz-csv halting code #1162
|
||||
- Change default start to 1900-01-01 #1170
|
||||
|
||||
0.1.85
|
||||
------
|
||||
- Fix info['log_url'] #1062
|
||||
- Fix handling delisted ticker #1137
|
||||
|
||||
0.1.84
|
||||
------
|
||||
- Make tz-cache thread-safe
|
||||
|
||||
0.1.83
|
||||
------
|
||||
- Reduce spam-effect of tz-fetch
|
||||
|
||||
0.1.81
|
||||
------
|
||||
- Fix unhandled tz-cache exception #1107
|
||||
|
||||
0.1.80
|
||||
------
|
||||
- Fix `download(ignore_tz=True)` for single ticker #1097
|
||||
- Fix rare case of error "Cannot infer DST time" #1100
|
||||
|
||||
0.1.79
|
||||
------
|
||||
- Fix when Yahoo returns price=NaNs on dividend day
|
||||
|
||||
0.1.78
|
||||
------
|
||||
- Fix download() when different timezones #1085
|
||||
|
||||
0.1.77
|
||||
------
|
||||
- Fix user experience bug #1078
|
||||
|
||||
0.1.75
|
||||
------
|
||||
- Fixed datetime-related issues: #1048
|
||||
- Add 'keepna' argument #1032
|
||||
- Speedup Ticker() creation #1042
|
||||
- Improve a bugfix #1033
|
||||
|
||||
0.1.74
|
||||
------
|
||||
- Fixed bug introduced in 0.1.73 (sorry :/)
|
||||
|
||||
30
README.md
30
README.md
@@ -48,8 +48,6 @@ Yahoo! finance API is intended for personal use only.**
|
||||
|
||||
The `Ticker` module, which allows you to access ticker data in a more Pythonic way:
|
||||
|
||||
Note: yahoo finance datetimes are received as UTC.
|
||||
|
||||
```python
|
||||
import yfinance as yf
|
||||
|
||||
@@ -187,6 +185,11 @@ data = yf.download( # or pdr.get_data_yahoo(...
|
||||
# (optional, default is '1d')
|
||||
interval = "1m",
|
||||
|
||||
# Whether to ignore timezone when aligning ticker data from
|
||||
# different timezones. Default is True. False may be useful for
|
||||
# minute/hourly data.
|
||||
ignore_tz = False,
|
||||
|
||||
# group by ticker (to access via data['SPY'])
|
||||
# (optional, default is 'column')
|
||||
group_by = 'ticker',
|
||||
@@ -209,6 +212,18 @@ data = yf.download( # or pdr.get_data_yahoo(...
|
||||
)
|
||||
```
|
||||
|
||||
### Timezone cache store
|
||||
|
||||
When fetching price data, all dates are localized to stock exchange timezone.
|
||||
But timezone retrieval is relatively slow, so yfinance attemps to cache them
|
||||
in your users cache folder.
|
||||
You can direct cache to use a different location with `set_tz_cache_location()`:
|
||||
```python
|
||||
import yfinance as yf
|
||||
yf.set_tz_cache_location("custom/cache/location")
|
||||
...
|
||||
```
|
||||
|
||||
### Managing Multi-Level Columns
|
||||
|
||||
The following answer on Stack Overflow is for [How to deal with
|
||||
@@ -259,11 +274,12 @@ To install `yfinance` using `conda`, see
|
||||
### Requirements
|
||||
|
||||
- [Python](https://www.python.org) \>= 2.7, 3.4+
|
||||
- [Pandas](https://github.com/pydata/pandas) (tested to work with
|
||||
\>=0.23.1)
|
||||
- [Numpy](http://www.numpy.org) \>= 1.11.1
|
||||
- [requests](http://docs.python-requests.org/en/master/) \>= 2.14.2
|
||||
- [lxml](https://pypi.org/project/lxml/) \>= 4.5.1
|
||||
- [Pandas](https://github.com/pydata/pandas) \>= 1.3.0
|
||||
- [Numpy](http://www.numpy.org) \>= 1.16.5
|
||||
- [requests](http://docs.python-requests.org/en/master/) \>= 2.26
|
||||
- [lxml](https://pypi.org/project/lxml/) \>= 4.9.1
|
||||
- [appdirs](https://pypi.org/project/appdirs) \>= 1.4.4
|
||||
- [cryptography](https://pypi.org/project/cryptography) \>=3.3.2
|
||||
|
||||
### Optional (if you want to use `pandas_datareader`)
|
||||
|
||||
|
||||
18
meta.yaml
18
meta.yaml
@@ -1,5 +1,5 @@
|
||||
{% set name = "yfinance" %}
|
||||
{% set version = "0.1.58" %}
|
||||
{% set version = "0.1.96" %}
|
||||
|
||||
package:
|
||||
name: "{{ name|lower }}"
|
||||
@@ -16,20 +16,24 @@ build:
|
||||
|
||||
requirements:
|
||||
host:
|
||||
- pandas >=0.24.0
|
||||
- pandas >=1.3.0
|
||||
- numpy >=1.16.5
|
||||
- requests >=2.21
|
||||
- requests >=2.26
|
||||
- multitasking >=0.0.7
|
||||
- lxml >=4.5.1
|
||||
- lxml >=4.9.1
|
||||
- appdirs >= 1.4.4
|
||||
- cryptography >= 3.3.2
|
||||
- pip
|
||||
- python
|
||||
|
||||
run:
|
||||
- pandas >=0.24.0
|
||||
- pandas >=1.3.0
|
||||
- numpy >=1.16.5
|
||||
- requests >=2.21
|
||||
- requests >=2.26
|
||||
- multitasking >=0.0.7
|
||||
- lxml >=4.5.1
|
||||
- lxml >=4.9.1
|
||||
- appdirs >= 1.4.4
|
||||
- cryptography >= 3.3.2
|
||||
- python
|
||||
|
||||
test:
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
pandas>=0.24.0
|
||||
pandas>=1.3.0
|
||||
numpy>=1.16.5
|
||||
requests>=2.26
|
||||
multitasking>=0.0.7
|
||||
lxml>=4.5.1
|
||||
lxml>=4.9.1
|
||||
appdirs>=1.4.4
|
||||
cryptography>=3.3.2
|
||||
|
||||
5
setup.py
5
setup.py
@@ -61,9 +61,10 @@ setup(
|
||||
platforms=['any'],
|
||||
keywords='pandas, yahoo finance, pandas datareader',
|
||||
packages=find_packages(exclude=['contrib', 'docs', 'tests', 'examples']),
|
||||
install_requires=['pandas>=0.24.0', 'numpy>=1.15',
|
||||
install_requires=['pandas>=1.3.0', 'numpy>=1.16.5',
|
||||
'requests>=2.26', 'multitasking>=0.0.7',
|
||||
'lxml>=4.5.1'],
|
||||
'lxml>=4.9.1', 'appdirs>=1.4.4',
|
||||
'cryptography>=3.3.2'],
|
||||
entry_points={
|
||||
'console_scripts': [
|
||||
'sample=sample:main',
|
||||
|
||||
@@ -15,21 +15,90 @@ Sanity check for most common library uses all working
|
||||
|
||||
import yfinance as yf
|
||||
import unittest
|
||||
import datetime
|
||||
|
||||
symbols = ['MSFT', 'IWO', 'VFINX', '^GSPC', 'BTC-USD']
|
||||
tickers = [yf.Ticker(symbol) for symbol in symbols]
|
||||
session = None
|
||||
import requests_cache ; session = requests_cache.CachedSession("yfinance.cache", expire_after=24*60*60)
|
||||
|
||||
# Good symbols = all attributes should work
|
||||
good_symbols = ['MSFT', 'IWO', 'VFINX', '^GSPC', 'BTC-USD']
|
||||
good_tickers = [yf.Ticker(symbol, session=session) for symbol in good_symbols]
|
||||
# Dodgy symbols = Yahoo data incomplete, so exclude from some tests
|
||||
dodgy_symbols = ["G7W.DU"]
|
||||
dodgy_tickers = [yf.Ticker(symbol, session=session) for symbol in dodgy_symbols]
|
||||
symbols = good_symbols + dodgy_symbols
|
||||
tickers = good_tickers + dodgy_tickers
|
||||
# Delisted = no data expected but yfinance shouldn't raise exception
|
||||
delisted_symbols = ["BRK.B", "SDLP"]
|
||||
delisted_tickers = [yf.Ticker(symbol, session=session) for symbol in delisted_symbols]
|
||||
|
||||
|
||||
class TestTicker(unittest.TestCase):
|
||||
def setUp(self):
|
||||
d_today = datetime.date.today()
|
||||
d_today -= datetime.timedelta(days=30)
|
||||
self.start_d = datetime.date(d_today.year, d_today.month, 1)
|
||||
|
||||
def test_info_history(self):
|
||||
# always should have info and history for valid symbols
|
||||
for ticker in tickers:
|
||||
# always should have info and history for valid symbols
|
||||
assert(ticker.info is not None and ticker.info != {})
|
||||
history = ticker.history(period="max")
|
||||
history = ticker.history(period="1mo")
|
||||
assert(history.empty is False and history is not None)
|
||||
histories = yf.download(symbols, period="1mo", session=session)
|
||||
assert(histories.empty is False and histories is not None)
|
||||
|
||||
for ticker in tickers:
|
||||
assert(ticker.info is not None and ticker.info != {})
|
||||
history = ticker.history(start=self.start_d)
|
||||
assert(history.empty is False and history is not None)
|
||||
histories = yf.download(symbols, start=self.start_d, session=session)
|
||||
assert(histories.empty is False and histories is not None)
|
||||
|
||||
def test_info_history_nofail(self):
|
||||
# should not throw Exception for delisted tickers, just print a message
|
||||
for ticker in delisted_tickers:
|
||||
history = ticker.history(period="1mo")
|
||||
histories = yf.download(delisted_symbols, period="1mo", session=session)
|
||||
histories = yf.download(delisted_symbols[0], period="1mo", session=session)
|
||||
histories = yf.download(delisted_symbols[1], period="1mo")#, session=session)
|
||||
for ticker in delisted_tickers:
|
||||
history = ticker.history(start=self.start_d)
|
||||
histories = yf.download(delisted_symbols, start=self.start_d, session=session)
|
||||
histories = yf.download(delisted_symbols[0], start=self.start_d, session=session)
|
||||
histories = yf.download(delisted_symbols[1], start=self.start_d, session=session)
|
||||
|
||||
def test_attributes(self):
|
||||
for ticker in tickers:
|
||||
ticker.isin
|
||||
ticker.major_holders
|
||||
ticker.institutional_holders
|
||||
ticker.mutualfund_holders
|
||||
ticker.dividends
|
||||
ticker.splits
|
||||
ticker.actions
|
||||
ticker.info
|
||||
ticker.info["trailingPegRatio"]
|
||||
ticker.calendar
|
||||
ticker.recommendations
|
||||
ticker.earnings
|
||||
ticker.quarterly_earnings
|
||||
ticker.financials
|
||||
ticker.quarterly_financials
|
||||
ticker.balance_sheet
|
||||
ticker.quarterly_balance_sheet
|
||||
ticker.cashflow
|
||||
ticker.quarterly_cashflow
|
||||
ticker.sustainability
|
||||
ticker.options
|
||||
ticker.news
|
||||
ticker.shares
|
||||
ticker.earnings_history
|
||||
ticker.earnings_dates
|
||||
|
||||
def test_attributes_nofail(self):
|
||||
# should not throw Exception for delisted tickers, just print a message
|
||||
for ticker in delisted_tickers:
|
||||
ticker.isin
|
||||
ticker.major_holders
|
||||
ticker.institutional_holders
|
||||
@@ -56,8 +125,7 @@ class TestTicker(unittest.TestCase):
|
||||
ticker.earnings_dates
|
||||
|
||||
def test_holders(self):
|
||||
for ticker in tickers:
|
||||
assert(ticker.info is not None and ticker.info != {})
|
||||
for ticker in good_tickers:
|
||||
assert(ticker.major_holders is not None)
|
||||
assert(ticker.institutional_holders is not None)
|
||||
|
||||
|
||||
@@ -23,6 +23,7 @@ from . import version
|
||||
from .ticker import Ticker
|
||||
from .tickers import Tickers
|
||||
from .multi import download
|
||||
from .utils import set_tz_cache_location
|
||||
|
||||
__version__ = version.version
|
||||
__author__ = "Ran Aroussi"
|
||||
@@ -42,4 +43,4 @@ def pdr_override():
|
||||
pass
|
||||
|
||||
|
||||
__all__ = ['download', 'Ticker', 'Tickers', 'pdr_override']
|
||||
__all__ = ['download', 'Ticker', 'Tickers', 'pdr_override', 'set_tz_cache_location']
|
||||
|
||||
483
yfinance/base.py
483
yfinance/base.py
@@ -23,6 +23,7 @@ from __future__ import print_function
|
||||
|
||||
import time as _time
|
||||
import datetime as _datetime
|
||||
import pytz as _tz
|
||||
import requests as _requests
|
||||
import pandas as _pd
|
||||
import numpy as _np
|
||||
@@ -53,6 +54,7 @@ class TickerBase():
|
||||
self._history = None
|
||||
self._base_url = _BASE_URL_
|
||||
self._scrape_url = _SCRAPE_URL_
|
||||
self._tz = None
|
||||
|
||||
self._fundamentals = False
|
||||
self._info = None
|
||||
@@ -71,18 +73,10 @@ class TickerBase():
|
||||
self._earnings_dates = None
|
||||
self._earnings_history = None
|
||||
|
||||
self._earnings = {
|
||||
"yearly": utils.empty_df(),
|
||||
"quarterly": utils.empty_df()}
|
||||
self._financials = {
|
||||
"yearly": utils.empty_df(),
|
||||
"quarterly": utils.empty_df()}
|
||||
self._balancesheet = {
|
||||
"yearly": utils.empty_df(),
|
||||
"quarterly": utils.empty_df()}
|
||||
self._cashflow = {
|
||||
"yearly": utils.empty_df(),
|
||||
"quarterly": utils.empty_df()}
|
||||
self._earnings = None
|
||||
self._financials = None
|
||||
self._balancesheet = None
|
||||
self._cashflow = None
|
||||
|
||||
# accept isin as ticker
|
||||
if utils.is_isin(self.ticker):
|
||||
@@ -106,8 +100,8 @@ class TickerBase():
|
||||
|
||||
def history(self, period="1mo", interval="1d",
|
||||
start=None, end=None, prepost=False, actions=True,
|
||||
auto_adjust=True, back_adjust=False,
|
||||
proxy=None, rounding=False, tz=None, timeout=None, **kwargs):
|
||||
auto_adjust=True, back_adjust=False, keepna=False,
|
||||
proxy=None, rounding=False, timeout=None, **kwargs):
|
||||
"""
|
||||
:Parameters:
|
||||
period : str
|
||||
@@ -129,14 +123,14 @@ class TickerBase():
|
||||
Adjust all OHLC automatically? Default is True
|
||||
back_adjust: bool
|
||||
Back-adjusted data to mimic true historical prices
|
||||
keepna: bool
|
||||
Keep NaN rows returned by Yahoo?
|
||||
Default is False
|
||||
proxy: str
|
||||
Optional. Proxy server URL scheme. Default is None
|
||||
rounding: bool
|
||||
Round values to 2 decimal places?
|
||||
Optional. Default is False = precision suggested by Yahoo!
|
||||
tz: str
|
||||
Optional timezone locale for dates.
|
||||
(default data is returned as non-localized dates)
|
||||
timeout: None or float
|
||||
If not None stops waiting for a response after given number of
|
||||
seconds. (Can also be a fraction of a second e.g. 0.01)
|
||||
@@ -147,23 +141,39 @@ class TickerBase():
|
||||
error message printing to console.
|
||||
"""
|
||||
|
||||
# Work with errors
|
||||
debug_mode = True
|
||||
if "debug" in kwargs and isinstance(kwargs["debug"], bool):
|
||||
debug_mode = kwargs["debug"]
|
||||
if "many" in kwargs and kwargs["many"]:
|
||||
# Disable prints with threads, it deadlocks/throws
|
||||
debug_mode = False
|
||||
|
||||
err_msg = "No data found for this date range, symbol may be delisted"
|
||||
|
||||
if start or period is None or period.lower() == "max":
|
||||
# Check can get TZ. Fail => probably delisted
|
||||
tz = self._get_ticker_tz(debug_mode, proxy, timeout)
|
||||
if tz is None:
|
||||
# Every valid ticker has a timezone. Missing = problem
|
||||
shared._DFS[self.ticker] = utils.empty_df()
|
||||
shared._ERRORS[self.ticker] = err_msg
|
||||
if debug_mode:
|
||||
print('- %s: %s' % (self.ticker, err_msg))
|
||||
return utils.empty_df()
|
||||
|
||||
if end is None:
|
||||
end = int(_time.time())
|
||||
elif isinstance(end, _datetime.datetime):
|
||||
end = int(_time.mktime(end.timetuple()))
|
||||
else:
|
||||
end = int(_time.mktime(_time.strptime(str(end), '%Y-%m-%d')))
|
||||
end = utils._parse_user_dt(end, tz)
|
||||
if start is None:
|
||||
if interval == "1m":
|
||||
start = end - 604800 # Subtract 7 days
|
||||
else:
|
||||
start = -631159200
|
||||
elif isinstance(start, _datetime.datetime):
|
||||
start = int(_time.mktime(start.timetuple()))
|
||||
#time stamp of 01/01/1900
|
||||
start = -2208994789
|
||||
else:
|
||||
start = int(_time.mktime(
|
||||
_time.strptime(str(start), '%Y-%m-%d')))
|
||||
start = utils._parse_user_dt(start, tz)
|
||||
params = {"period1": start, "period2": end}
|
||||
else:
|
||||
period = period.lower()
|
||||
@@ -206,17 +216,10 @@ class TickerBase():
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Work with errors
|
||||
debug_mode = True
|
||||
if "debug" in kwargs and isinstance(kwargs["debug"], bool):
|
||||
debug_mode = kwargs["debug"]
|
||||
|
||||
err_msg = "No data found for this date range, symbol may be delisted"
|
||||
|
||||
if data is None or not type(data) is dict or 'status_code' in data.keys():
|
||||
shared._DFS[self.ticker] = utils.empty_df()
|
||||
shared._ERRORS[self.ticker] = err_msg
|
||||
if "many" not in kwargs and debug_mode:
|
||||
if debug_mode:
|
||||
print('- %s: %s' % (self.ticker, err_msg))
|
||||
return utils.empty_df()
|
||||
|
||||
@@ -224,7 +227,7 @@ class TickerBase():
|
||||
err_msg = data["chart"]["error"]["description"]
|
||||
shared._DFS[self.ticker] = utils.empty_df()
|
||||
shared._ERRORS[self.ticker] = err_msg
|
||||
if "many" not in kwargs and debug_mode:
|
||||
if debug_mode:
|
||||
print('- %s: %s' % (self.ticker, err_msg))
|
||||
return shared._DFS[self.ticker]
|
||||
|
||||
@@ -232,17 +235,22 @@ class TickerBase():
|
||||
not data["chart"]["result"]:
|
||||
shared._DFS[self.ticker] = utils.empty_df()
|
||||
shared._ERRORS[self.ticker] = err_msg
|
||||
if "many" not in kwargs and debug_mode:
|
||||
if debug_mode:
|
||||
print('- %s: %s' % (self.ticker, err_msg))
|
||||
return shared._DFS[self.ticker]
|
||||
|
||||
# parse quotes
|
||||
try:
|
||||
quotes = utils.parse_quotes(data["chart"]["result"][0], tz)
|
||||
quotes = utils.parse_quotes(data["chart"]["result"][0])
|
||||
# Yahoo bug fix - it often appends latest price even if after end date
|
||||
if end and not quotes.empty:
|
||||
endDt = _pd.to_datetime(_datetime.datetime.utcfromtimestamp(end))
|
||||
if quotes.index[quotes.shape[0]-1] >= endDt:
|
||||
quotes = quotes.iloc[0:quotes.shape[0]-1]
|
||||
except Exception:
|
||||
shared._DFS[self.ticker] = utils.empty_df()
|
||||
shared._ERRORS[self.ticker] = err_msg
|
||||
if "many" not in kwargs and debug_mode:
|
||||
if debug_mode:
|
||||
print('- %s: %s' % (self.ticker, err_msg))
|
||||
return shared._DFS[self.ticker]
|
||||
|
||||
@@ -278,7 +286,7 @@ class TickerBase():
|
||||
err_msg = "back_adjust failed with %s" % e
|
||||
shared._DFS[self.ticker] = utils.empty_df()
|
||||
shared._ERRORS[self.ticker] = err_msg
|
||||
if "many" not in kwargs and debug_mode:
|
||||
if debug_mode:
|
||||
print('- %s: %s' % (self.ticker, err_msg))
|
||||
|
||||
if rounding:
|
||||
@@ -286,16 +294,10 @@ class TickerBase():
|
||||
"chart"]["result"][0]["meta"]["priceHint"])
|
||||
quotes['Volume'] = quotes['Volume'].fillna(0).astype(_np.int64)
|
||||
|
||||
quotes.dropna(inplace=True)
|
||||
|
||||
# actions
|
||||
dividends, splits = utils.parse_actions(data["chart"]["result"][0], tz)
|
||||
dividends, splits = utils.parse_actions(data["chart"]["result"][0])
|
||||
|
||||
# Yahoo bug fix - it often appends latest price even if after end date
|
||||
if end and not quotes.empty:
|
||||
endDt = _pd.to_datetime(_datetime.datetime.fromtimestamp(end))
|
||||
if quotes.index[quotes.shape[0]-1] > endDt:
|
||||
quotes = quotes.iloc[0:quotes.shape[0]-1]
|
||||
tz_exchange = data["chart"]["result"][0]["meta"]["exchangeTimezoneName"]
|
||||
|
||||
# combine
|
||||
df = _pd.concat([quotes, dividends, splits], axis=1, sort=True)
|
||||
@@ -303,32 +305,245 @@ class TickerBase():
|
||||
df["Stock Splits"].fillna(0, inplace=True)
|
||||
|
||||
# index eod/intraday
|
||||
df.index = df.index.tz_localize("UTC").tz_convert(
|
||||
data["chart"]["result"][0]["meta"]["exchangeTimezoneName"])
|
||||
df.index = df.index.tz_localize("UTC").tz_convert(tz_exchange)
|
||||
|
||||
df = utils.fix_Yahoo_dst_issue(df, params["interval"])
|
||||
|
||||
if params["interval"][-1] == "m":
|
||||
df.index.name = "Datetime"
|
||||
elif params["interval"] == "1h":
|
||||
pass
|
||||
else:
|
||||
df.index = _pd.to_datetime(df.index.date)
|
||||
if tz is not None:
|
||||
df.index = df.index.tz_localize(tz)
|
||||
# If a midnight is during DST transition hour when clocks roll back,
|
||||
# meaning clock hits midnight twice, then use the 2nd (ambiguous=True)
|
||||
df.index = _pd.to_datetime(df.index.date).tz_localize(tz_exchange, ambiguous=True, nonexistent='shift_forward')
|
||||
df.index.name = "Date"
|
||||
|
||||
# duplicates and missing rows cleanup
|
||||
df.dropna(how='all', inplace=True)
|
||||
df = df[~df.index.duplicated(keep='first')]
|
||||
|
||||
self._history = df.copy()
|
||||
|
||||
if not actions:
|
||||
df.drop(columns=["Dividends", "Stock Splits"], inplace=True)
|
||||
df = df.drop(columns=["Dividends", "Stock Splits"])
|
||||
if not keepna:
|
||||
mask_nan_or_zero = (df.isna()|(df==0)).all(axis=1)
|
||||
df = df.drop(mask_nan_or_zero.index[mask_nan_or_zero])
|
||||
|
||||
return df
|
||||
|
||||
# ------------------------
|
||||
|
||||
def _get_ticker_tz(self, debug_mode, proxy, timeout):
|
||||
if not self._tz is None:
|
||||
return self._tz
|
||||
|
||||
tkr_tz = utils.cache_lookup_tkr_tz(self.ticker)
|
||||
|
||||
if tkr_tz is not None:
|
||||
invalid_value = isinstance(tkr_tz, str)
|
||||
if not invalid_value:
|
||||
try:
|
||||
_tz.timezone(tz)
|
||||
except:
|
||||
invalid_value = True
|
||||
if invalid_value:
|
||||
# Clear from cache and force re-fetch
|
||||
utils.cache_store_tkr_tz(self.ticker, None)
|
||||
tkr_tz = None
|
||||
|
||||
if tkr_tz is None:
|
||||
tkr_tz = self._fetch_ticker_tz(debug_mode, proxy, timeout)
|
||||
|
||||
if tkr_tz is not None:
|
||||
try:
|
||||
utils.cache_store_tkr_tz(self.ticker, tkr_tz)
|
||||
except PermissionError:
|
||||
# System probably read-only, so cannot cache
|
||||
pass
|
||||
|
||||
self._tz = tkr_tz
|
||||
return tkr_tz
|
||||
|
||||
|
||||
def _fetch_ticker_tz(self, debug_mode, proxy, timeout):
|
||||
# Query Yahoo for basic price data just to get returned timezone
|
||||
|
||||
params = {"range":"1d", "interval":"1d"}
|
||||
|
||||
# setup proxy in requests format
|
||||
if proxy is not None:
|
||||
if isinstance(proxy, dict) and "https" in proxy:
|
||||
proxy = proxy["https"]
|
||||
proxy = {"https": proxy}
|
||||
|
||||
# Getting data from json
|
||||
url = "{}/v8/finance/chart/{}".format(self._base_url, self.ticker)
|
||||
|
||||
session = self.session or _requests
|
||||
try:
|
||||
data = session.get(url=url, params=params, proxies=proxy, headers=utils.user_agent_headers, timeout=timeout)
|
||||
data = data.json()
|
||||
except Exception as e:
|
||||
if debug_mode:
|
||||
print("Failed to get ticker '{}' reason: {}".format(self.ticker, e))
|
||||
return None
|
||||
else:
|
||||
error = data.get('chart', {}).get('error', None)
|
||||
if error:
|
||||
# explicit error from yahoo API
|
||||
if debug_mode:
|
||||
print("Got error from yahoo api for ticker {}, Error: {}".format(self.ticker, error))
|
||||
else:
|
||||
try:
|
||||
return data["chart"]["result"][0]["meta"]["exchangeTimezoneName"]
|
||||
except Exception as err:
|
||||
if debug_mode:
|
||||
print("Could not get exchangeTimezoneName for ticker '{}' reason: {}".format(self.ticker, err))
|
||||
print("Got response: ")
|
||||
print("-------------")
|
||||
print(" {}".format(data))
|
||||
print("-------------")
|
||||
return None
|
||||
|
||||
|
||||
def _get_info(self, proxy=None):
|
||||
# setup proxy in requests format
|
||||
if proxy is not None:
|
||||
if isinstance(proxy, dict) and "https" in proxy:
|
||||
proxy = proxy["https"]
|
||||
proxy = {"https": proxy}
|
||||
|
||||
if (self._info is None) or (self._sustainability is None) or (self._recommendations is None):
|
||||
## Need to fetch
|
||||
pass
|
||||
else:
|
||||
return
|
||||
|
||||
ticker_url = "{}/{}".format(self._scrape_url, self.ticker)
|
||||
|
||||
# get info and sustainability
|
||||
data = utils.get_json(ticker_url, proxy, self.session)
|
||||
|
||||
# sustainability
|
||||
d = {}
|
||||
try:
|
||||
if isinstance(data.get('esgScores'), dict):
|
||||
for item in data['esgScores']:
|
||||
if not isinstance(data['esgScores'][item], (dict, list)):
|
||||
d[item] = data['esgScores'][item]
|
||||
|
||||
s = _pd.DataFrame(index=[0], data=d)[-1:].T
|
||||
s.columns = ['Value']
|
||||
s.index.name = '%.f-%.f' % (
|
||||
s[s.index == 'ratingYear']['Value'].values[0],
|
||||
s[s.index == 'ratingMonth']['Value'].values[0])
|
||||
|
||||
self._sustainability = s[~s.index.isin(
|
||||
['maxAge', 'ratingYear', 'ratingMonth'])]
|
||||
else:
|
||||
self._sustainability = utils.empty_df()
|
||||
except Exception:
|
||||
self._sustainability = utils.empty_df()
|
||||
pass
|
||||
|
||||
# info (be nice to python 2)
|
||||
self._info = {}
|
||||
try:
|
||||
items = ['summaryProfile', 'financialData', 'quoteType',
|
||||
'defaultKeyStatistics', 'assetProfile', 'summaryDetail']
|
||||
for item in items:
|
||||
if isinstance(data.get(item), dict):
|
||||
self._info.update(data[item])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# For ETFs, provide this valuable data: the top holdings of the ETF
|
||||
try:
|
||||
if 'topHoldings' in data:
|
||||
self._info.update(data['topHoldings'])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
if not isinstance(data.get('summaryDetail'), dict):
|
||||
# For some reason summaryDetail did not give any results. The price dict usually has most of the same info
|
||||
self._info.update(data.get('price', {}))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
# self._info['regularMarketPrice'] = self._info['regularMarketOpen']
|
||||
self._info['regularMarketPrice'] = data.get('price', {}).get(
|
||||
'regularMarketPrice', self._info.get('regularMarketOpen', None))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
self._info['preMarketPrice'] = data.get('price', {}).get(
|
||||
'preMarketPrice', self._info.get('preMarketPrice', None))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
self._info['logo_url'] = ""
|
||||
try:
|
||||
if not 'website' in self._info:
|
||||
self._info['logo_url'] = 'https://logo.clearbit.com/%s.com' % self._info['shortName'].split(' ')[0].split(',')[0]
|
||||
else:
|
||||
domain = self._info['website'].split(
|
||||
'://')[1].split('/')[0].replace('www.', '')
|
||||
self._info['logo_url'] = 'https://logo.clearbit.com/%s' % domain
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# events
|
||||
try:
|
||||
cal = _pd.DataFrame(
|
||||
data['calendarEvents']['earnings'])
|
||||
cal['earningsDate'] = _pd.to_datetime(
|
||||
cal['earningsDate'], unit='s')
|
||||
self._calendar = cal.T
|
||||
self._calendar.index = utils.camel2title(self._calendar.index)
|
||||
self._calendar.columns = ['Value']
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# analyst recommendations
|
||||
try:
|
||||
rec = _pd.DataFrame(
|
||||
data['upgradeDowngradeHistory']['history'])
|
||||
rec['earningsDate'] = _pd.to_datetime(
|
||||
rec['epochGradeDate'], unit='s')
|
||||
rec.set_index('earningsDate', inplace=True)
|
||||
rec.index.name = 'Date'
|
||||
rec.columns = utils.camel2title(rec.columns)
|
||||
self._recommendations = rec[[
|
||||
'Firm', 'To Grade', 'From Grade', 'Action']].sort_index()
|
||||
except Exception:
|
||||
self._recommendations = utils.empty_df()
|
||||
pass
|
||||
|
||||
# Complementary key-statistics. For now just want 'trailing PEG ratio'
|
||||
session = self.session or _requests
|
||||
keys = {"trailingPegRatio"}
|
||||
if len(keys)>0:
|
||||
# For just one/few variable is faster to query directly:
|
||||
url = "https://query1.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{}?symbol={}".format(self.ticker, self.ticker)
|
||||
for k in keys:
|
||||
url += "&type="+k
|
||||
# Request 6 months of data
|
||||
url += "&period1={}".format(int((_datetime.datetime.now()-_datetime.timedelta(days=365//2)).timestamp()))
|
||||
url += "&period2={}".format(int((_datetime.datetime.now()+_datetime.timedelta(days=1)).timestamp()))
|
||||
json_str = session.get(url=url, proxies=proxy, headers=utils.user_agent_headers).text
|
||||
json_data = _json.loads(json_str)
|
||||
key_stats = json_data["timeseries"]["result"][0]
|
||||
if k not in key_stats:
|
||||
# Yahoo website prints N/A, indicates Yahoo lacks necessary data to calculate
|
||||
v = None
|
||||
else:
|
||||
# Select most recent (last) raw value in list:
|
||||
v = key_stats[k][-1]["reportedValue"]["raw"]
|
||||
self._info[k] = v
|
||||
|
||||
|
||||
def _get_fundamentals(self, proxy=None):
|
||||
def cleanup(data):
|
||||
df = _pd.DataFrame(data).drop(columns=['maxAge'])
|
||||
@@ -362,9 +577,6 @@ class TickerBase():
|
||||
|
||||
ticker_url = "{}/{}".format(self._scrape_url, self.ticker)
|
||||
|
||||
# get info and sustainability
|
||||
data = utils.get_json(ticker_url, proxy, self.session)
|
||||
|
||||
# holders
|
||||
try:
|
||||
resp = utils.get_html(ticker_url + '/holders', proxy, self.session)
|
||||
@@ -401,101 +613,16 @@ class TickerBase():
|
||||
self._mutualfund_holders['% Out'] = self._mutualfund_holders[
|
||||
'% Out'].str.replace('%', '').astype(float) / 100
|
||||
|
||||
# sustainability
|
||||
d = {}
|
||||
try:
|
||||
if isinstance(data.get('esgScores'), dict):
|
||||
for item in data['esgScores']:
|
||||
if not isinstance(data['esgScores'][item], (dict, list)):
|
||||
d[item] = data['esgScores'][item]
|
||||
|
||||
s = _pd.DataFrame(index=[0], data=d)[-1:].T
|
||||
s.columns = ['Value']
|
||||
s.index.name = '%.f-%.f' % (
|
||||
s[s.index == 'ratingYear']['Value'].values[0],
|
||||
s[s.index == 'ratingMonth']['Value'].values[0])
|
||||
|
||||
self._sustainability = s[~s.index.isin(
|
||||
['maxAge', 'ratingYear', 'ratingMonth'])]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# info (be nice to python 2)
|
||||
self._info = {}
|
||||
try:
|
||||
items = ['summaryProfile', 'financialData', 'quoteType',
|
||||
'defaultKeyStatistics', 'assetProfile', 'summaryDetail']
|
||||
for item in items:
|
||||
if isinstance(data.get(item), dict):
|
||||
self._info.update(data[item])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# For ETFs, provide this valuable data: the top holdings of the ETF
|
||||
try:
|
||||
if 'topHoldings' in data:
|
||||
self._info.update(data['topHoldings'])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
if not isinstance(data.get('summaryDetail'), dict):
|
||||
# For some reason summaryDetail did not give any results. The price dict usually has most of the same info
|
||||
self._info.update(data.get('price', {}))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
# self._info['regularMarketPrice'] = self._info['regularMarketOpen']
|
||||
self._info['regularMarketPrice'] = data.get('price', {}).get(
|
||||
'regularMarketPrice', self._info.get('regularMarketOpen', None))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
self._info['preMarketPrice'] = data.get('price', {}).get(
|
||||
'preMarketPrice', self._info.get('preMarketPrice', None))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
self._info['logo_url'] = ""
|
||||
try:
|
||||
domain = self._info['website'].split(
|
||||
'://')[1].split('/')[0].replace('www.', '')
|
||||
self._info['logo_url'] = 'https://logo.clearbit.com/%s' % domain
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# events
|
||||
try:
|
||||
cal = _pd.DataFrame(
|
||||
data['calendarEvents']['earnings'])
|
||||
cal['earningsDate'] = _pd.to_datetime(
|
||||
cal['earningsDate'], unit='s')
|
||||
self._calendar = cal.T
|
||||
self._calendar.index = utils.camel2title(self._calendar.index)
|
||||
self._calendar.columns = ['Value']
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# analyst recommendations
|
||||
try:
|
||||
rec = _pd.DataFrame(
|
||||
data['upgradeDowngradeHistory']['history'])
|
||||
rec['earningsDate'] = _pd.to_datetime(
|
||||
rec['epochGradeDate'], unit='s')
|
||||
rec.set_index('earningsDate', inplace=True)
|
||||
rec.index.name = 'Date'
|
||||
rec.columns = utils.camel2title(rec.columns)
|
||||
self._recommendations = rec[[
|
||||
'Firm', 'To Grade', 'From Grade', 'Action']].sort_index()
|
||||
except Exception:
|
||||
pass
|
||||
self._get_info(proxy)
|
||||
|
||||
# get fundamentals
|
||||
data = utils.get_json(ticker_url + '/financials', proxy, self.session)
|
||||
|
||||
# generic patterns
|
||||
self._earnings = {"yearly": utils.empty_df(), "quarterly": utils.empty_df()}
|
||||
self._cashflow = {"yearly": utils.empty_df(), "quarterly": utils.empty_df()}
|
||||
self._balancesheet = {"yearly": utils.empty_df(), "quarterly": utils.empty_df()}
|
||||
self._financials = {"yearly": utils.empty_df(), "quarterly": utils.empty_df()}
|
||||
for key in (
|
||||
(self._cashflow, 'cashflowStatement', 'cashflowStatements'),
|
||||
(self._balancesheet, 'balanceSheet', 'balanceSheetStatements'),
|
||||
@@ -576,59 +703,17 @@ class TickerBase():
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Complementary key-statistics (currently fetching the important trailingPegRatio which is the value shown in the website)
|
||||
res = {}
|
||||
try:
|
||||
my_headers = {'user-agent': 'curl/7.55.1', 'accept': 'application/json', 'content-type': 'application/json',
|
||||
'referer': 'https://finance.yahoo.com/', 'cache-control': 'no-cache', 'connection': 'close'}
|
||||
p = _re.compile(r'root\.App\.main = (.*);')
|
||||
r = _requests.session().get('https://finance.yahoo.com/quote/{}/key-statistics?p={}'.format(self.ticker,
|
||||
self.ticker), headers=my_headers)
|
||||
q_results = {}
|
||||
my_qs_keys = ['pegRatio'] # QuoteSummaryStore
|
||||
# , 'quarterlyPegRatio'] # QuoteTimeSeriesStore
|
||||
my_ts_keys = ['trailingPegRatio']
|
||||
|
||||
# Complementary key-statistics
|
||||
data = _json.loads(p.findall(r.text)[0])
|
||||
key_stats = data['context']['dispatcher']['stores']['QuoteTimeSeriesStore']
|
||||
q_results.setdefault(self.ticker, [])
|
||||
for i in my_ts_keys:
|
||||
# j=0
|
||||
try:
|
||||
# res = {i: key_stats['timeSeries'][i][1]['reportedValue']['raw']}
|
||||
# We need to loop over multiple items, if they exist: 0,1,2,..
|
||||
zzz = key_stats['timeSeries'][i]
|
||||
for j in range(len(zzz)):
|
||||
if key_stats['timeSeries'][i][j]:
|
||||
res = {i: key_stats['timeSeries']
|
||||
[i][j]['reportedValue']['raw']}
|
||||
q_results[self.ticker].append(res)
|
||||
|
||||
# print(res)
|
||||
# q_results[ticker].append(res)
|
||||
except:
|
||||
q_results[ticker].append({i: np.nan})
|
||||
|
||||
res = {'Company': ticker}
|
||||
q_results[ticker].append(res)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if 'trailingPegRatio' in res:
|
||||
self._info['trailingPegRatio'] = res['trailingPegRatio']
|
||||
|
||||
self._fundamentals = True
|
||||
|
||||
def get_recommendations(self, proxy=None, as_dict=False, *args, **kwargs):
|
||||
self._get_fundamentals(proxy=proxy)
|
||||
self._get_info(proxy)
|
||||
data = self._recommendations
|
||||
if as_dict:
|
||||
return data.to_dict()
|
||||
return data
|
||||
|
||||
def get_calendar(self, proxy=None, as_dict=False, *args, **kwargs):
|
||||
self._get_fundamentals(proxy=proxy)
|
||||
self._get_info(proxy)
|
||||
data = self._calendar
|
||||
if as_dict:
|
||||
return data.to_dict()
|
||||
@@ -658,14 +743,14 @@ class TickerBase():
|
||||
return data
|
||||
|
||||
def get_info(self, proxy=None, as_dict=False, *args, **kwargs):
|
||||
self._get_fundamentals(proxy=proxy)
|
||||
self._get_info(proxy)
|
||||
data = self._info
|
||||
if as_dict:
|
||||
return data.to_dict()
|
||||
return data
|
||||
|
||||
def get_sustainability(self, proxy=None, as_dict=False, *args, **kwargs):
|
||||
self._get_fundamentals(proxy=proxy)
|
||||
self._get_info(proxy)
|
||||
data = self._sustainability
|
||||
if as_dict:
|
||||
return data.to_dict()
|
||||
@@ -763,6 +848,10 @@ class TickerBase():
|
||||
self.get_info(proxy=proxy)
|
||||
if "shortName" in self._info:
|
||||
q = self._info['shortName']
|
||||
if q is None:
|
||||
err_msg = "Cannot map to ISIN code, symbol may be delisted"
|
||||
print('- %s: %s' % (self.ticker, err_msg))
|
||||
return None
|
||||
|
||||
url = 'https://markets.businessinsider.com/ajax/' \
|
||||
'SearchController_Suggest?max_results=25&query=%s' \
|
||||
@@ -861,8 +950,10 @@ class TickerBase():
|
||||
dates = _pd.concat([dates, data], axis=0)
|
||||
page_offset += page_size
|
||||
|
||||
if dates is None:
|
||||
raise Exception("No data found, symbol may be delisted")
|
||||
if (dates is None) or dates.shape[0]==0:
|
||||
err_msg = "No earnings dates found, symbol may be delisted"
|
||||
print('- %s: %s' % (self.ticker, err_msg))
|
||||
return None
|
||||
dates = dates.reset_index(drop=True)
|
||||
|
||||
# Drop redundant columns
|
||||
|
||||
@@ -29,8 +29,8 @@ from . import Ticker, utils
|
||||
from . import shared
|
||||
|
||||
|
||||
def download(tickers, start=None, end=None, actions=False, threads=True,
|
||||
group_by='column', auto_adjust=False, back_adjust=False,
|
||||
def download(tickers, start=None, end=None, actions=False, threads=True, ignore_tz=True,
|
||||
group_by='column', auto_adjust=False, back_adjust=False, keepna=False,
|
||||
progress=True, period="max", show_errors=True, interval="1d", prepost=False,
|
||||
proxy=None, rounding=False, timeout=None, **kwargs):
|
||||
"""Download yahoo tickers
|
||||
@@ -56,10 +56,16 @@ def download(tickers, start=None, end=None, actions=False, threads=True,
|
||||
Default is False
|
||||
auto_adjust: bool
|
||||
Adjust all OHLC automatically? Default is False
|
||||
keepna: bool
|
||||
Keep NaN rows returned by Yahoo?
|
||||
Default is False
|
||||
actions: bool
|
||||
Download dividend + stock splits data. Default is False
|
||||
threads: bool / int
|
||||
How many threads to use for mass downloading. Default is True
|
||||
ignore_tz: bool
|
||||
When combining from different timezones, ignore that part of datetime.
|
||||
Default is True
|
||||
proxy: str
|
||||
Optional. Proxy server URL scheme. Default is None
|
||||
rounding: bool
|
||||
@@ -105,7 +111,7 @@ def download(tickers, start=None, end=None, actions=False, threads=True,
|
||||
_download_one_threaded(ticker, period=period, interval=interval,
|
||||
start=start, end=end, prepost=prepost,
|
||||
actions=actions, auto_adjust=auto_adjust,
|
||||
back_adjust=back_adjust,
|
||||
back_adjust=back_adjust, keepna=keepna,
|
||||
progress=(progress and i > 0), proxy=proxy,
|
||||
rounding=rounding, timeout=timeout)
|
||||
while len(shared._DFS) < len(tickers):
|
||||
@@ -117,7 +123,7 @@ def download(tickers, start=None, end=None, actions=False, threads=True,
|
||||
data = _download_one(ticker, period=period, interval=interval,
|
||||
start=start, end=end, prepost=prepost,
|
||||
actions=actions, auto_adjust=auto_adjust,
|
||||
back_adjust=back_adjust, proxy=proxy,
|
||||
back_adjust=back_adjust, keepna=keepna, proxy=proxy,
|
||||
rounding=rounding, timeout=timeout)
|
||||
shared._DFS[ticker.upper()] = data
|
||||
if progress:
|
||||
@@ -133,16 +139,21 @@ def download(tickers, start=None, end=None, actions=False, threads=True,
|
||||
print("\n".join(['- %s: %s' %
|
||||
v for v in list(shared._ERRORS.items())]))
|
||||
|
||||
if ignore_tz:
|
||||
for tkr in shared._DFS.keys():
|
||||
if (shared._DFS[tkr] is not None) and (shared._DFS[tkr].shape[0]>0):
|
||||
shared._DFS[tkr].index = shared._DFS[tkr].index.tz_localize(None)
|
||||
|
||||
if len(tickers) == 1:
|
||||
ticker = tickers[0]
|
||||
return shared._DFS[shared._ISINS.get(ticker, ticker)]
|
||||
|
||||
try:
|
||||
data = _pd.concat(shared._DFS.values(), axis=1,
|
||||
data = _pd.concat(shared._DFS.values(), axis=1, sort=True,
|
||||
keys=shared._DFS.keys())
|
||||
except Exception:
|
||||
_realign_dfs()
|
||||
data = _pd.concat(shared._DFS.values(), axis=1,
|
||||
data = _pd.concat(shared._DFS.values(), axis=1, sort=True,
|
||||
keys=shared._DFS.keys())
|
||||
|
||||
# switch names back to isins if applicable
|
||||
@@ -183,11 +194,11 @@ def _download_one_threaded(ticker, start=None, end=None,
|
||||
auto_adjust=False, back_adjust=False,
|
||||
actions=False, progress=True, period="max",
|
||||
interval="1d", prepost=False, proxy=None,
|
||||
rounding=False, timeout=None):
|
||||
keepna=False, rounding=False, timeout=None):
|
||||
|
||||
data = _download_one(ticker, start, end, auto_adjust, back_adjust,
|
||||
actions, period, interval, prepost, proxy, rounding,
|
||||
timeout)
|
||||
keepna, timeout, many=True)
|
||||
shared._DFS[ticker.upper()] = data
|
||||
if progress:
|
||||
shared._PROGRESS_BAR.animate()
|
||||
@@ -197,11 +208,11 @@ def _download_one(ticker, start=None, end=None,
|
||||
auto_adjust=False, back_adjust=False,
|
||||
actions=False, period="max", interval="1d",
|
||||
prepost=False, proxy=None, rounding=False,
|
||||
timeout=None):
|
||||
keepna=False, timeout=None, many=False):
|
||||
|
||||
return Ticker(ticker).history(period=period, interval=interval,
|
||||
start=start, end=end, prepost=prepost,
|
||||
actions=actions, auto_adjust=auto_adjust,
|
||||
back_adjust=back_adjust, proxy=proxy,
|
||||
rounding=rounding, many=True,
|
||||
timeout=timeout)
|
||||
rounding=rounding, keepna=keepna, timeout=timeout,
|
||||
many=many)
|
||||
|
||||
@@ -21,11 +21,30 @@
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import datetime as _datetime
|
||||
import pytz as _tz
|
||||
import requests as _requests
|
||||
import re as _re
|
||||
import pandas as _pd
|
||||
import numpy as _np
|
||||
import sys as _sys
|
||||
import os as _os
|
||||
import appdirs as _ad
|
||||
|
||||
from base64 import b64decode
|
||||
import hashlib
|
||||
usePycryptodome = False # slightly faster
|
||||
# usePycryptodome = True
|
||||
if usePycryptodome:
|
||||
# NOTE: if decide to use 'pycryptodome', set min version to 3.6.6
|
||||
from Crypto.Cipher import AES
|
||||
from Crypto.Util.Padding import unpad
|
||||
else:
|
||||
from cryptography.hazmat.primitives import padding
|
||||
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
|
||||
|
||||
from threading import Lock
|
||||
mutex = Lock()
|
||||
|
||||
try:
|
||||
import ujson as _json
|
||||
@@ -102,24 +121,112 @@ def get_html(url, proxy=None, session=None):
|
||||
return html
|
||||
|
||||
|
||||
|
||||
def decrypt_cryptojs_stores(data):
|
||||
"""
|
||||
Yahoo has started encrypting data stores, this method decrypts it.
|
||||
:param data: Python dict of the json data
|
||||
:return: The decrypted string data in data['context']['dispatcher']['stores']
|
||||
"""
|
||||
|
||||
_cs = data["_cs"]
|
||||
# Assumes _cr has format like: '{"words":[-449732894,601032952,157396918,2056341829],"sigBytes":16}';
|
||||
_cr = _json.loads(data["_cr"])
|
||||
_cr = b"".join(int.to_bytes(i, length=4, byteorder="big", signed=True) for i in _cr["words"])
|
||||
|
||||
password = hashlib.pbkdf2_hmac("sha1", _cs.encode("utf8"), _cr, 1, dklen=32).hex()
|
||||
|
||||
encrypted_stores = data['context']['dispatcher']['stores']
|
||||
encrypted_stores = b64decode(encrypted_stores)
|
||||
assert encrypted_stores[0:8] == b"Salted__"
|
||||
salt = encrypted_stores[8:16]
|
||||
encrypted_stores = encrypted_stores[16:]
|
||||
|
||||
key, iv = _EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="md5")
|
||||
|
||||
if usePycryptodome:
|
||||
cipher = AES.new(key, AES.MODE_CBC, iv=iv)
|
||||
plaintext = cipher.decrypt(encrypted_stores)
|
||||
plaintext = unpad(plaintext, 16, style="pkcs7")
|
||||
else:
|
||||
cipher = Cipher(algorithms.AES(key), modes.CBC(iv))
|
||||
decryptor = cipher.decryptor()
|
||||
plaintext = decryptor.update(encrypted_stores) + decryptor.finalize()
|
||||
unpadder = padding.PKCS7(128).unpadder()
|
||||
plaintext = unpadder.update(plaintext) + unpadder.finalize()
|
||||
plaintext = plaintext.decode("utf-8")
|
||||
|
||||
return plaintext
|
||||
|
||||
def _EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="md5") -> tuple:
|
||||
"""OpenSSL EVP Key Derivation Function
|
||||
Args:
|
||||
password (Union[str, bytes, bytearray]): Password to generate key from.
|
||||
salt (Union[bytes, bytearray]): Salt to use.
|
||||
keySize (int, optional): Output key length in bytes. Defaults to 32.
|
||||
ivSize (int, optional): Output Initialization Vector (IV) length in bytes. Defaults to 16.
|
||||
iterations (int, optional): Number of iterations to perform. Defaults to 1.
|
||||
hashAlgorithm (str, optional): Hash algorithm to use for the KDF. Defaults to 'md5'.
|
||||
Returns:
|
||||
key, iv: Derived key and Initialization Vector (IV) bytes.
|
||||
|
||||
Taken from: https://gist.github.com/rafiibrahim8/0cd0f8c46896cafef6486cb1a50a16d3
|
||||
OpenSSL original code: https://github.com/openssl/openssl/blob/master/crypto/evp/evp_key.c#L78
|
||||
"""
|
||||
|
||||
assert iterations > 0, "Iterations can not be less than 1."
|
||||
|
||||
if isinstance(password, str):
|
||||
password = password.encode("utf-8")
|
||||
|
||||
final_length = keySize + ivSize
|
||||
key_iv = b""
|
||||
block = None
|
||||
|
||||
while len(key_iv) < final_length:
|
||||
hasher = hashlib.new(hashAlgorithm)
|
||||
if block:
|
||||
hasher.update(block)
|
||||
hasher.update(password)
|
||||
hasher.update(salt)
|
||||
block = hasher.digest()
|
||||
for _ in range(1, iterations):
|
||||
block = hashlib.new(hashAlgorithm, block).digest()
|
||||
key_iv += block
|
||||
|
||||
key, iv = key_iv[:keySize], key_iv[keySize:final_length]
|
||||
return key, iv
|
||||
|
||||
|
||||
def get_json(url, proxy=None, session=None):
|
||||
session = session or _requests
|
||||
html = session.get(url=url, proxies=proxy, headers=user_agent_headers).text
|
||||
|
||||
if "QuoteSummaryStore" not in html:
|
||||
html = session.get(url=url, proxies=proxy).text
|
||||
if "QuoteSummaryStore" not in html:
|
||||
return {}
|
||||
if not "root.App.main =" in html:
|
||||
return {}
|
||||
|
||||
json_str = html.split('root.App.main =')[1].split(
|
||||
'(this)')[0].split(';\n}')[0].strip()
|
||||
data = _json.loads(json_str)[
|
||||
'context']['dispatcher']['stores']['QuoteSummaryStore']
|
||||
data = _json.loads(json_str)
|
||||
|
||||
if "_cs" in data and "_cr" in data:
|
||||
data_stores = _json.loads(decrypt_cryptojs_stores(data))
|
||||
else:
|
||||
if "context" in data and "dispatcher" in data["context"]:
|
||||
# Keep old code, just in case
|
||||
data_stores = data['context']['dispatcher']['stores']
|
||||
else:
|
||||
data_stores = data
|
||||
|
||||
if not 'QuoteSummaryStore' in data_stores:
|
||||
# Problem in data. Either delisted, or Yahoo spam triggered
|
||||
return {}
|
||||
|
||||
data = data_stores['QuoteSummaryStore']
|
||||
# add data about Shares Outstanding for companies' tickers if they are available
|
||||
try:
|
||||
data['annualBasicAverageShares'] = _json.loads(
|
||||
json_str)['context']['dispatcher']['stores'][
|
||||
'QuoteTimeSeriesStore']['timeSeries']['annualBasicAverageShares']
|
||||
data['annualBasicAverageShares'] = \
|
||||
data_stores['QuoteTimeSeriesStore']['timeSeries']['annualBasicAverageShares']
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -135,6 +242,23 @@ def camel2title(o):
|
||||
return [_re.sub("([a-z])([A-Z])", r"\g<1> \g<2>", i).title() for i in o]
|
||||
|
||||
|
||||
def _parse_user_dt(dt, exchange_tz):
|
||||
if isinstance(dt, int):
|
||||
## Should already be epoch, test with conversion:
|
||||
_datetime.datetime.fromtimestamp(dt)
|
||||
else:
|
||||
# Convert str/date -> datetime, set tzinfo=exchange, get timestamp:
|
||||
if isinstance(dt, str):
|
||||
dt = _datetime.datetime.strptime(str(dt), '%Y-%m-%d')
|
||||
if isinstance(dt, _datetime.date) and not isinstance(dt, _datetime.datetime):
|
||||
dt = _datetime.datetime.combine(dt, _datetime.time(0))
|
||||
if isinstance(dt, _datetime.datetime) and dt.tzinfo is None:
|
||||
# Assume user is referring to exchange's timezone
|
||||
dt = _tz.timezone(exchange_tz).localize(dt)
|
||||
dt = int(dt.timestamp())
|
||||
return dt
|
||||
|
||||
|
||||
def auto_adjust(data):
|
||||
df = data.copy()
|
||||
ratio = df["Close"] / df["Adj Close"]
|
||||
@@ -176,7 +300,7 @@ def back_adjust(data):
|
||||
return df[["Open", "High", "Low", "Close", "Volume"]]
|
||||
|
||||
|
||||
def parse_quotes(data, tz=None):
|
||||
def parse_quotes(data):
|
||||
timestamps = data["timestamp"]
|
||||
ohlc = data["indicators"]["quote"][0]
|
||||
volumes = ohlc["volume"]
|
||||
@@ -199,13 +323,10 @@ def parse_quotes(data, tz=None):
|
||||
quotes.index = _pd.to_datetime(timestamps, unit="s")
|
||||
quotes.sort_index(inplace=True)
|
||||
|
||||
if tz is not None:
|
||||
quotes.index = quotes.index.tz_localize(tz)
|
||||
|
||||
return quotes
|
||||
|
||||
|
||||
def parse_actions(data, tz=None):
|
||||
def parse_actions(data):
|
||||
dividends = _pd.DataFrame(
|
||||
columns=["Dividends"], index=_pd.DatetimeIndex([]))
|
||||
splits = _pd.DataFrame(
|
||||
@@ -218,8 +339,6 @@ def parse_actions(data, tz=None):
|
||||
dividends.set_index("date", inplace=True)
|
||||
dividends.index = _pd.to_datetime(dividends.index, unit="s")
|
||||
dividends.sort_index(inplace=True)
|
||||
if tz is not None:
|
||||
dividends.index = dividends.index.tz_localize(tz)
|
||||
|
||||
dividends.columns = ["Dividends"]
|
||||
|
||||
@@ -229,8 +348,6 @@ def parse_actions(data, tz=None):
|
||||
splits.set_index("date", inplace=True)
|
||||
splits.index = _pd.to_datetime(splits.index, unit="s")
|
||||
splits.sort_index(inplace=True)
|
||||
if tz is not None:
|
||||
splits.index = splits.index.tz_localize(tz)
|
||||
splits["Stock Splits"] = splits["numerator"] / \
|
||||
splits["denominator"]
|
||||
splits = splits["Stock Splits"]
|
||||
@@ -238,6 +355,19 @@ def parse_actions(data, tz=None):
|
||||
return dividends, splits
|
||||
|
||||
|
||||
def fix_Yahoo_dst_issue(df, interval):
|
||||
if interval in ["1d","1w","1wk"]:
|
||||
# These intervals should start at time 00:00. But for some combinations of date and timezone,
|
||||
# Yahoo has time off by few hours (e.g. Brazil 23:00 around Jan-2022). Suspect DST problem.
|
||||
# The clue is (a) minutes=0 and (b) hour near 0.
|
||||
# Obviously Yahoo meant 00:00, so ensure this doesn't affect date conversion:
|
||||
f_pre_midnight = (df.index.minute == 0) & (df.index.hour.isin([22,23]))
|
||||
dst_error_hours = _np.array([0]*df.shape[0])
|
||||
dst_error_hours[f_pre_midnight] = 24-df.index[f_pre_midnight].hour
|
||||
df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
|
||||
return df
|
||||
|
||||
|
||||
class ProgressBar:
|
||||
def __init__(self, iterations, text='completed'):
|
||||
self.text = text
|
||||
@@ -286,3 +416,55 @@ class ProgressBar:
|
||||
|
||||
def __str__(self):
|
||||
return str(self.prog_bar)
|
||||
|
||||
|
||||
# Simple file cache of ticker->timezone:
|
||||
_cache_dp = None
|
||||
def get_cache_dirpath():
|
||||
if _cache_dp is None:
|
||||
dp = _os.path.join(_ad.user_cache_dir(), "py-yfinance")
|
||||
else:
|
||||
dp = _os.path.join(_cache_dp, "py-yfinance")
|
||||
return dp
|
||||
def set_tz_cache_location(dp):
|
||||
global _cache_dp
|
||||
_cache_dp = dp
|
||||
|
||||
def cache_lookup_tkr_tz(tkr):
|
||||
fp = _os.path.join(get_cache_dirpath(), "tkr-tz.csv")
|
||||
if not _os.path.isfile(fp):
|
||||
return None
|
||||
|
||||
mutex.acquire()
|
||||
df = _pd.read_csv(fp, index_col="Ticker", on_bad_lines="skip")
|
||||
mutex.release()
|
||||
if tkr in df.index:
|
||||
return df.loc[tkr,"Tz"]
|
||||
else:
|
||||
return None
|
||||
def cache_store_tkr_tz(tkr,tz):
|
||||
|
||||
dp = get_cache_dirpath()
|
||||
fp = _os.path.join(dp, "tkr-tz.csv")
|
||||
mutex.acquire()
|
||||
if not _os.path.isdir(dp):
|
||||
_os.makedirs(dp)
|
||||
if (not _os.path.isfile(fp)) and (tz is not None):
|
||||
df = _pd.DataFrame({"Tz":[tz]}, index=[tkr])
|
||||
df.index.name = "Ticker"
|
||||
df.to_csv(fp)
|
||||
|
||||
else:
|
||||
df = _pd.read_csv(fp, index_col="Ticker", on_bad_lines="skip")
|
||||
if tz is None:
|
||||
# Delete if in cache:
|
||||
if tkr in df.index:
|
||||
df.drop(tkr).to_csv(fp)
|
||||
else:
|
||||
if tkr in df.index:
|
||||
raise Exception("Tkr {} tz already in cache".format(tkr))
|
||||
df.loc[tkr,"Tz"] = tz
|
||||
df.to_csv(fp)
|
||||
|
||||
mutex.release()
|
||||
|
||||
|
||||
@@ -1 +1 @@
|
||||
version = "0.1.74"
|
||||
version = "0.1.96"
|
||||
|
||||
Reference in New Issue
Block a user