Compare commits
82 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a197d9f78e | ||
|
|
dbb9bbfbf3 | ||
|
|
a7b053addd | ||
|
|
e8ca256c10 | ||
|
|
f651dd1e93 | ||
|
|
f40cf0aae1 | ||
|
|
200f57c458 | ||
|
|
e5d45eaa85 | ||
|
|
42b77a9b54 | ||
|
|
bca005a2c0 | ||
|
|
ca891bb187 | ||
|
|
0939ff3c78 | ||
|
|
6f5c5635be | ||
|
|
809622e426 | ||
|
|
eec1f3dbad | ||
|
|
1de789ad72 | ||
|
|
cd68ff68c6 | ||
|
|
9673970f45 | ||
|
|
6ea69a70ac | ||
|
|
c723a5ab44 | ||
|
|
50741d1409 | ||
|
|
69d0dcd62b | ||
|
|
5c9348f255 | ||
|
|
a472546e7b | ||
|
|
c914f1f183 | ||
|
|
92c82342fe | ||
|
|
7ae08b04f3 | ||
|
|
4b50f1e81c | ||
|
|
1ed58be749 | ||
|
|
375b4f9376 | ||
|
|
b6b4426ca9 | ||
|
|
149ebe46db | ||
|
|
d80b27cfde | ||
|
|
36e277317b | ||
|
|
0e1ea4d2c6 | ||
|
|
2d96c383ef | ||
|
|
ec6279736b | ||
|
|
5d942d9668 | ||
|
|
5782cb59fd | ||
|
|
4c4861a8f1 | ||
|
|
4d221ca70e | ||
|
|
1a8d045baf | ||
|
|
67a55c35ce | ||
|
|
e547fe4e41 | ||
|
|
9d5366d707 | ||
|
|
4b07d1dceb | ||
|
|
9440c1e1c1 | ||
|
|
773d003a67 | ||
|
|
a2905a0f8d | ||
|
|
1810455e15 | ||
|
|
9770f286b4 | ||
|
|
80eb0ddafb | ||
|
|
a9e9e8dcb3 | ||
|
|
69c0673345 | ||
|
|
8077cabf44 | ||
|
|
e35b98ef1d | ||
|
|
dc40c6d093 | ||
|
|
8572628ba6 | ||
|
|
f878ce5ea5 | ||
|
|
bbf68daf3f | ||
|
|
afbb5d81a4 | ||
|
|
670ec8e766 | ||
|
|
8fa9438072 | ||
|
|
8c56a0c3df | ||
|
|
6e90fc17cb | ||
|
|
0849f68dd8 | ||
|
|
77f3810cdd | ||
|
|
f3583b00e3 | ||
|
|
d0606cdb03 | ||
|
|
76a9b09e8e | ||
|
|
d757b8f25f | ||
|
|
ab50edf24c | ||
|
|
9b4bce7d06 | ||
|
|
a396d93601 | ||
|
|
79d6741f1e | ||
|
|
47a119f63b | ||
|
|
436c077ee2 | ||
|
|
72b1e9699e | ||
|
|
d2c2690cc2 | ||
|
|
65c1753fb2 | ||
|
|
93edc3c163 | ||
|
|
b2e2acad06 |
@@ -1,6 +1,41 @@
|
||||
Change Log
|
||||
===========
|
||||
|
||||
0.1.75
|
||||
------
|
||||
|
||||
- Fixed datetime-related issues: #1048
|
||||
- Add 'keepna' argument #1032
|
||||
- Speedup Ticker() creation #1042
|
||||
- Improve a bugfix #1033
|
||||
|
||||
0.1.74
|
||||
------
|
||||
- Fixed bug introduced in 0.1.73 (sorry :/)
|
||||
|
||||
0.1.73
|
||||
------
|
||||
- Merged several PR that fixed misc issues
|
||||
|
||||
0.1.72
|
||||
------
|
||||
- Misc bugfixs
|
||||
|
||||
0.1.71
|
||||
------
|
||||
- Added Tickers(…).news()
|
||||
- Return empty DF if YF missing earnings dates
|
||||
- Fix EPS % to 0->1
|
||||
- Fix timezone handling
|
||||
- Fix handling of missing data
|
||||
- Clean&format earnings_dates table
|
||||
- Add ``.get_earnings_dates()`` to retreive earnings calendar
|
||||
- Added ``.get_earnings_history()`` to fetch earnings data
|
||||
|
||||
0.1.70
|
||||
------
|
||||
- Bug fixed - Closes #937
|
||||
|
||||
0.1.69
|
||||
------
|
||||
- Bug fixed - #920
|
||||
|
||||
@@ -101,6 +101,9 @@ msft.recommendations
|
||||
# show next event (earnings, etc)
|
||||
msft.calendar
|
||||
|
||||
# show all earnings dates
|
||||
msft.earnings_dates
|
||||
|
||||
# show ISIN code - *experimental*
|
||||
# ISIN = International Securities Identification Number
|
||||
msft.isin
|
||||
@@ -261,6 +264,7 @@ To install `yfinance` using `conda`, see
|
||||
- [Numpy](http://www.numpy.org) \>= 1.11.1
|
||||
- [requests](http://docs.python-requests.org/en/master/) \>= 2.14.2
|
||||
- [lxml](https://pypi.org/project/lxml/) \>= 4.5.1
|
||||
- [appdirs](https://pypi.org/project/appdirs) \>=1.4.4
|
||||
|
||||
### Optional (if you want to use `pandas_datareader`)
|
||||
|
||||
|
||||
@@ -21,6 +21,7 @@ requirements:
|
||||
- requests >=2.21
|
||||
- multitasking >=0.0.7
|
||||
- lxml >=4.5.1
|
||||
- appdirs >= 1.4.4
|
||||
- pip
|
||||
- python
|
||||
|
||||
@@ -30,6 +31,7 @@ requirements:
|
||||
- requests >=2.21
|
||||
- multitasking >=0.0.7
|
||||
- lxml >=4.5.1
|
||||
- appdirs >= 1.4.4
|
||||
- python
|
||||
|
||||
test:
|
||||
|
||||
@@ -3,3 +3,4 @@ numpy>=1.16.5
|
||||
requests>=2.26
|
||||
multitasking>=0.0.7
|
||||
lxml>=4.5.1
|
||||
appdirs>=1.4.4
|
||||
|
||||
4
setup.py
4
setup.py
@@ -61,9 +61,9 @@ setup(
|
||||
platforms=['any'],
|
||||
keywords='pandas, yahoo finance, pandas datareader',
|
||||
packages=find_packages(exclude=['contrib', 'docs', 'tests', 'examples']),
|
||||
install_requires=['pandas>=0.24', 'numpy>=1.15',
|
||||
install_requires=['pandas>=0.24.0', 'numpy>=1.15',
|
||||
'requests>=2.26', 'multitasking>=0.0.7',
|
||||
'lxml>=4.5.1'],
|
||||
'lxml>=4.5.1', 'appdirs>=1.4.4'],
|
||||
entry_points={
|
||||
'console_scripts': [
|
||||
'sample=sample:main',
|
||||
|
||||
@@ -52,6 +52,8 @@ class TestTicker(unittest.TestCase):
|
||||
ticker.options
|
||||
ticker.news
|
||||
ticker.shares
|
||||
ticker.earnings_history
|
||||
ticker.earnings_dates
|
||||
|
||||
def test_holders(self):
|
||||
for ticker in tickers:
|
||||
|
||||
374
yfinance/base.py
374
yfinance/base.py
@@ -23,6 +23,7 @@ from __future__ import print_function
|
||||
|
||||
import time as _time
|
||||
import datetime as _datetime
|
||||
import pytz as _tz
|
||||
import requests as _requests
|
||||
import pandas as _pd
|
||||
import numpy as _np
|
||||
@@ -43,6 +44,8 @@ from . import shared
|
||||
|
||||
_BASE_URL_ = 'https://query2.finance.yahoo.com'
|
||||
_SCRAPE_URL_ = 'https://finance.yahoo.com/quote'
|
||||
_ROOT_URL_ = 'https://finance.yahoo.com'
|
||||
|
||||
|
||||
class TickerBase():
|
||||
def __init__(self, ticker, session=None):
|
||||
@@ -51,6 +54,7 @@ class TickerBase():
|
||||
self._history = None
|
||||
self._base_url = _BASE_URL_
|
||||
self._scrape_url = _SCRAPE_URL_
|
||||
self._tz = None
|
||||
|
||||
self._fundamentals = False
|
||||
self._info = None
|
||||
@@ -66,19 +70,13 @@ class TickerBase():
|
||||
|
||||
self._calendar = None
|
||||
self._expirations = {}
|
||||
self._earnings_dates = None
|
||||
self._earnings_history = None
|
||||
|
||||
self._earnings = {
|
||||
"yearly": utils.empty_df(),
|
||||
"quarterly": utils.empty_df()}
|
||||
self._financials = {
|
||||
"yearly": utils.empty_df(),
|
||||
"quarterly": utils.empty_df()}
|
||||
self._balancesheet = {
|
||||
"yearly": utils.empty_df(),
|
||||
"quarterly": utils.empty_df()}
|
||||
self._cashflow = {
|
||||
"yearly": utils.empty_df(),
|
||||
"quarterly": utils.empty_df()}
|
||||
self._earnings = None
|
||||
self._financials = None
|
||||
self._balancesheet = None
|
||||
self._cashflow = None
|
||||
|
||||
# accept isin as ticker
|
||||
if utils.is_isin(self.ticker):
|
||||
@@ -102,8 +100,8 @@ class TickerBase():
|
||||
|
||||
def history(self, period="1mo", interval="1d",
|
||||
start=None, end=None, prepost=False, actions=True,
|
||||
auto_adjust=True, back_adjust=False,
|
||||
proxy=None, rounding=False, tz=None, timeout=None, **kwargs):
|
||||
auto_adjust=True, back_adjust=False, keepna=False,
|
||||
proxy=None, rounding=False, timeout=None, **kwargs):
|
||||
"""
|
||||
:Parameters:
|
||||
period : str
|
||||
@@ -125,14 +123,14 @@ class TickerBase():
|
||||
Adjust all OHLC automatically? Default is True
|
||||
back_adjust: bool
|
||||
Back-adjusted data to mimic true historical prices
|
||||
keepna: bool
|
||||
Keep NaN rows returned by Yahoo?
|
||||
Default is False
|
||||
proxy: str
|
||||
Optional. Proxy server URL scheme. Default is None
|
||||
rounding: bool
|
||||
Round values to 2 decimal places?
|
||||
Optional. Default is False = precision suggested by Yahoo!
|
||||
tz: str
|
||||
Optional timezone locale for dates.
|
||||
(default data is returned as non-localized dates)
|
||||
timeout: None or float
|
||||
If not None stops waiting for a response after given number of
|
||||
seconds. (Can also be a fraction of a second e.g. 0.01)
|
||||
@@ -144,20 +142,17 @@ class TickerBase():
|
||||
"""
|
||||
|
||||
if start or period is None or period.lower() == "max":
|
||||
if start is None:
|
||||
start = -631159200
|
||||
elif isinstance(start, _datetime.datetime):
|
||||
start = int(_time.mktime(start.timetuple()))
|
||||
else:
|
||||
start = int(_time.mktime(
|
||||
_time.strptime(str(start), '%Y-%m-%d')))
|
||||
if end is None:
|
||||
end = int(_time.time())
|
||||
elif isinstance(end, _datetime.datetime):
|
||||
end = int(_time.mktime(end.timetuple()))
|
||||
else:
|
||||
end = int(_time.mktime(_time.strptime(str(end), '%Y-%m-%d')))
|
||||
|
||||
end = utils._parse_user_dt(end, self._get_ticker_tz())
|
||||
if start is None:
|
||||
if interval == "1m":
|
||||
start = end - 604800 # Subtract 7 days
|
||||
else:
|
||||
start = -631159200
|
||||
else:
|
||||
start = utils._parse_user_dt(start, self._get_ticker_tz())
|
||||
params = {"period1": start, "period2": end}
|
||||
else:
|
||||
period = period.lower()
|
||||
@@ -181,7 +176,6 @@ class TickerBase():
|
||||
url = "{}/v8/finance/chart/{}".format(self._base_url, self.ticker)
|
||||
|
||||
session = self.session or _requests
|
||||
|
||||
data = None
|
||||
|
||||
try:
|
||||
@@ -233,7 +227,12 @@ class TickerBase():
|
||||
|
||||
# parse quotes
|
||||
try:
|
||||
quotes = utils.parse_quotes(data["chart"]["result"][0], tz)
|
||||
quotes = utils.parse_quotes(data["chart"]["result"][0])
|
||||
# Yahoo bug fix - it often appends latest price even if after end date
|
||||
if end and not quotes.empty:
|
||||
endDt = _pd.to_datetime(_datetime.datetime.utcfromtimestamp(end))
|
||||
if quotes.index[quotes.shape[0]-1] >= endDt:
|
||||
quotes = quotes.iloc[0:quotes.shape[0]-1]
|
||||
except Exception:
|
||||
shared._DFS[self.ticker] = utils.empty_df()
|
||||
shared._ERRORS[self.ticker] = err_msg
|
||||
@@ -281,10 +280,13 @@ class TickerBase():
|
||||
"chart"]["result"][0]["meta"]["priceHint"])
|
||||
quotes['Volume'] = quotes['Volume'].fillna(0).astype(_np.int64)
|
||||
|
||||
quotes.dropna(inplace=True)
|
||||
if not keepna:
|
||||
quotes.dropna(inplace=True)
|
||||
|
||||
# actions
|
||||
dividends, splits = utils.parse_actions(data["chart"]["result"][0], tz)
|
||||
dividends, splits = utils.parse_actions(data["chart"]["result"][0])
|
||||
|
||||
tz_exchange = data["chart"]["result"][0]["meta"]["exchangeTimezoneName"]
|
||||
|
||||
# combine
|
||||
df = _pd.concat([quotes, dividends, splits], axis=1, sort=True)
|
||||
@@ -292,17 +294,16 @@ class TickerBase():
|
||||
df["Stock Splits"].fillna(0, inplace=True)
|
||||
|
||||
# index eod/intraday
|
||||
df.index = df.index.tz_localize("UTC").tz_convert(
|
||||
data["chart"]["result"][0]["meta"]["exchangeTimezoneName"])
|
||||
df.index = df.index.tz_localize("UTC").tz_convert(tz_exchange)
|
||||
|
||||
df = utils.fix_Yahoo_dst_issue(df, params["interval"])
|
||||
|
||||
if params["interval"][-1] == "m":
|
||||
df.index.name = "Datetime"
|
||||
elif params["interval"] == "1h":
|
||||
pass
|
||||
else:
|
||||
df.index = _pd.to_datetime(df.index.date)
|
||||
if tz is not None:
|
||||
df.index = df.index.tz_localize(tz)
|
||||
df.index = _pd.to_datetime(df.index.date).tz_localize(tz_exchange)
|
||||
df.index.name = "Date"
|
||||
|
||||
# duplicates and missing rows cleanup
|
||||
@@ -318,32 +319,30 @@ class TickerBase():
|
||||
|
||||
# ------------------------
|
||||
|
||||
def _get_fundamentals(self, kind=None, proxy=None):
|
||||
def cleanup(data):
|
||||
df = _pd.DataFrame(data).drop(columns=['maxAge'])
|
||||
for col in df.columns:
|
||||
df[col] = _np.where(
|
||||
df[col].astype(str) == '-', _np.nan, df[col])
|
||||
def _get_ticker_tz(self):
|
||||
if not self._tz is None:
|
||||
return self._tz
|
||||
|
||||
df.set_index('endDate', inplace=True)
|
||||
try:
|
||||
df.index = _pd.to_datetime(df.index, unit='s')
|
||||
except ValueError:
|
||||
df.index = _pd.to_datetime(df.index)
|
||||
df = df.T
|
||||
df.columns.name = ''
|
||||
df.index.name = 'Breakdown'
|
||||
tkr_tz = utils.cache_lookup_tkr_tz(self.ticker)
|
||||
if tkr_tz is None:
|
||||
tkr_tz = self.info["exchangeTimezoneName"]
|
||||
# info fetch is relatively slow so cache timezone
|
||||
utils.cache_store_tkr_tz(self.ticker, tkr_tz)
|
||||
|
||||
df.index = utils.camel2title(df.index)
|
||||
return df
|
||||
self._tz = tkr_tz
|
||||
return tkr_tz
|
||||
|
||||
def _get_info(self, proxy=None):
|
||||
# setup proxy in requests format
|
||||
if proxy is not None:
|
||||
if isinstance(proxy, dict) and "https" in proxy:
|
||||
proxy = proxy["https"]
|
||||
proxy = {"https": proxy}
|
||||
|
||||
if self._fundamentals:
|
||||
if (self._info is None) or (self._sustainability is None) or (self._recommendations is None):
|
||||
## Need to fetch
|
||||
pass
|
||||
else:
|
||||
return
|
||||
|
||||
ticker_url = "{}/{}".format(self._scrape_url, self.ticker)
|
||||
@@ -351,42 +350,6 @@ class TickerBase():
|
||||
# get info and sustainability
|
||||
data = utils.get_json(ticker_url, proxy, self.session)
|
||||
|
||||
# holders
|
||||
try:
|
||||
resp = utils.get_html(ticker_url + '/holders', proxy, self.session)
|
||||
holders = _pd.read_html(resp)
|
||||
except Exception:
|
||||
holders = []
|
||||
|
||||
if len(holders) >= 3:
|
||||
self._major_holders = holders[0]
|
||||
self._institutional_holders = holders[1]
|
||||
self._mutualfund_holders = holders[2]
|
||||
elif len(holders) >= 2:
|
||||
self._major_holders = holders[0]
|
||||
self._institutional_holders = holders[1]
|
||||
elif len(holders) >= 1:
|
||||
self._major_holders = holders[0]
|
||||
|
||||
# self._major_holders = holders[0]
|
||||
# self._institutional_holders = holders[1]
|
||||
|
||||
if self._institutional_holders is not None:
|
||||
if 'Date Reported' in self._institutional_holders:
|
||||
self._institutional_holders['Date Reported'] = _pd.to_datetime(
|
||||
self._institutional_holders['Date Reported'])
|
||||
if '% Out' in self._institutional_holders:
|
||||
self._institutional_holders['% Out'] = self._institutional_holders[
|
||||
'% Out'].str.replace('%', '').astype(float) / 100
|
||||
|
||||
if self._mutualfund_holders is not None:
|
||||
if 'Date Reported' in self._mutualfund_holders:
|
||||
self._mutualfund_holders['Date Reported'] = _pd.to_datetime(
|
||||
self._mutualfund_holders['Date Reported'])
|
||||
if '% Out' in self._mutualfund_holders:
|
||||
self._mutualfund_holders['% Out'] = self._mutualfund_holders[
|
||||
'% Out'].str.replace('%', '').astype(float) / 100
|
||||
|
||||
# sustainability
|
||||
d = {}
|
||||
try:
|
||||
@@ -417,7 +380,7 @@ class TickerBase():
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# For ETFs, provide this valuable data: the top holdings of the ETF
|
||||
# For ETFs, provide this valuable data: the top holdings of the ETF
|
||||
try:
|
||||
if 'topHoldings' in data:
|
||||
self._info.update(data['topHoldings'])
|
||||
@@ -478,10 +441,85 @@ class TickerBase():
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _get_fundamentals(self, proxy=None):
|
||||
def cleanup(data):
|
||||
df = _pd.DataFrame(data).drop(columns=['maxAge'])
|
||||
for col in df.columns:
|
||||
df[col] = _np.where(
|
||||
df[col].astype(str) == '-', _np.nan, df[col])
|
||||
|
||||
df.set_index('endDate', inplace=True)
|
||||
try:
|
||||
df.index = _pd.to_datetime(df.index, unit='s')
|
||||
except ValueError:
|
||||
df.index = _pd.to_datetime(df.index)
|
||||
df = df.T
|
||||
df.columns.name = ''
|
||||
df.index.name = 'Breakdown'
|
||||
|
||||
# rename incorrect yahoo key
|
||||
df.rename(index={'treasuryStock': 'Gains Losses Not Affecting Retained Earnings'}, inplace=True)
|
||||
|
||||
df.index = utils.camel2title(df.index)
|
||||
return df
|
||||
|
||||
# setup proxy in requests format
|
||||
if proxy is not None:
|
||||
if isinstance(proxy, dict) and "https" in proxy:
|
||||
proxy = proxy["https"]
|
||||
proxy = {"https": proxy}
|
||||
|
||||
if self._fundamentals:
|
||||
return
|
||||
|
||||
ticker_url = "{}/{}".format(self._scrape_url, self.ticker)
|
||||
|
||||
# holders
|
||||
try:
|
||||
resp = utils.get_html(ticker_url + '/holders', proxy, self.session)
|
||||
holders = _pd.read_html(resp)
|
||||
except Exception:
|
||||
holders = []
|
||||
|
||||
if len(holders) >= 3:
|
||||
self._major_holders = holders[0]
|
||||
self._institutional_holders = holders[1]
|
||||
self._mutualfund_holders = holders[2]
|
||||
elif len(holders) >= 2:
|
||||
self._major_holders = holders[0]
|
||||
self._institutional_holders = holders[1]
|
||||
elif len(holders) >= 1:
|
||||
self._major_holders = holders[0]
|
||||
|
||||
# self._major_holders = holders[0]
|
||||
# self._institutional_holders = holders[1]
|
||||
|
||||
if self._institutional_holders is not None:
|
||||
if 'Date Reported' in self._institutional_holders:
|
||||
self._institutional_holders['Date Reported'] = _pd.to_datetime(
|
||||
self._institutional_holders['Date Reported'])
|
||||
if '% Out' in self._institutional_holders:
|
||||
self._institutional_holders['% Out'] = self._institutional_holders[
|
||||
'% Out'].str.replace('%', '').astype(float) / 100
|
||||
|
||||
if self._mutualfund_holders is not None:
|
||||
if 'Date Reported' in self._mutualfund_holders:
|
||||
self._mutualfund_holders['Date Reported'] = _pd.to_datetime(
|
||||
self._mutualfund_holders['Date Reported'])
|
||||
if '% Out' in self._mutualfund_holders:
|
||||
self._mutualfund_holders['% Out'] = self._mutualfund_holders[
|
||||
'% Out'].str.replace('%', '').astype(float) / 100
|
||||
|
||||
self._get_info(proxy)
|
||||
|
||||
# get fundamentals
|
||||
data = utils.get_json(ticker_url + '/financials', proxy, self.session)
|
||||
|
||||
# generic patterns
|
||||
self._earnings = {"yearly": utils.empty_df(), "quarterly": utils.empty_df()}
|
||||
self._cashflow = {"yearly": utils.empty_df(), "quarterly": utils.empty_df()}
|
||||
self._balancesheet = {"yearly": utils.empty_df(), "quarterly": utils.empty_df()}
|
||||
self._financials = {"yearly": utils.empty_df(), "quarterly": utils.empty_df()}
|
||||
for key in (
|
||||
(self._cashflow, 'cashflowStatement', 'cashflowStatements'),
|
||||
(self._balancesheet, 'balanceSheet', 'balanceSheetStatements'),
|
||||
@@ -521,11 +559,15 @@ class TickerBase():
|
||||
|
||||
# shares outstanding
|
||||
try:
|
||||
shares = _pd.DataFrame(data['annualBasicAverageShares'])
|
||||
# keep only years with non None data
|
||||
available_shares = [shares_data for shares_data in data['annualBasicAverageShares'] if shares_data]
|
||||
shares = _pd.DataFrame(available_shares)
|
||||
shares['Year'] = shares['asOfDate'].agg(lambda x: int(x[:4]))
|
||||
shares.set_index('Year', inplace=True)
|
||||
shares.drop(columns=['dataId', 'asOfDate', 'periodType', 'currencyCode'], inplace=True)
|
||||
shares.rename(columns={'reportedValue': "BasicShares"}, inplace=True)
|
||||
shares.drop(columns=['dataId', 'asOfDate',
|
||||
'periodType', 'currencyCode'], inplace=True)
|
||||
shares.rename(
|
||||
columns={'reportedValue': "BasicShares"}, inplace=True)
|
||||
self._shares = shares
|
||||
except Exception:
|
||||
pass
|
||||
@@ -549,22 +591,27 @@ class TickerBase():
|
||||
if isinstance(colval, dict):
|
||||
dict_cols.append(colname)
|
||||
for k, v in colval.items():
|
||||
new_colname = colname + ' ' + utils.camel2title([k])[0]
|
||||
new_colname = colname + ' ' + \
|
||||
utils.camel2title([k])[0]
|
||||
analysis.loc[idx, new_colname] = v
|
||||
|
||||
self._analysis = analysis[[c for c in analysis.columns if c not in dict_cols]]
|
||||
self._analysis = analysis[[
|
||||
c for c in analysis.columns if c not in dict_cols]]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Complementary key-statistics (currently fetching the important trailingPegRatio which is the value shown in the website)
|
||||
res = {}
|
||||
try:
|
||||
my_headers = {'user-agent': 'curl/7.55.1', 'accept': 'application/json', 'content-type': 'application/json', 'referer': 'https://finance.yahoo.com/', 'cache-control': 'no-cache', 'connection': 'close'}
|
||||
my_headers = {'user-agent': 'curl/7.55.1', 'accept': 'application/json', 'content-type': 'application/json',
|
||||
'referer': 'https://finance.yahoo.com/', 'cache-control': 'no-cache', 'connection': 'close'}
|
||||
p = _re.compile(r'root\.App\.main = (.*);')
|
||||
r = _requests.session().get('https://finance.yahoo.com/quote/{}/key-statistics?p={}'.format(self.ticker, self.ticker), headers=my_headers)
|
||||
r = _requests.session().get('https://finance.yahoo.com/quote/{}/key-statistics?p={}'.format(self.ticker,
|
||||
self.ticker), headers=my_headers)
|
||||
q_results = {}
|
||||
my_qs_keys = ['pegRatio'] # QuoteSummaryStore
|
||||
my_ts_keys = ['trailingPegRatio'] # , 'quarterlyPegRatio'] # QuoteTimeSeriesStore
|
||||
# , 'quarterlyPegRatio'] # QuoteTimeSeriesStore
|
||||
my_ts_keys = ['trailingPegRatio']
|
||||
|
||||
# Complementary key-statistics
|
||||
data = _json.loads(p.findall(r.text)[0])
|
||||
@@ -578,7 +625,8 @@ class TickerBase():
|
||||
zzz = key_stats['timeSeries'][i]
|
||||
for j in range(len(zzz)):
|
||||
if key_stats['timeSeries'][i][j]:
|
||||
res = {i: key_stats['timeSeries'][i][j]['reportedValue']['raw']}
|
||||
res = {i: key_stats['timeSeries']
|
||||
[i][j]['reportedValue']['raw']}
|
||||
q_results[self.ticker].append(res)
|
||||
|
||||
# print(res)
|
||||
@@ -597,14 +645,14 @@ class TickerBase():
|
||||
self._fundamentals = True
|
||||
|
||||
def get_recommendations(self, proxy=None, as_dict=False, *args, **kwargs):
|
||||
self._get_fundamentals(proxy=proxy)
|
||||
self._get_info(proxy)
|
||||
data = self._recommendations
|
||||
if as_dict:
|
||||
return data.to_dict()
|
||||
return data
|
||||
|
||||
def get_calendar(self, proxy=None, as_dict=False, *args, **kwargs):
|
||||
self._get_fundamentals(proxy=proxy)
|
||||
self._get_info(proxy)
|
||||
data = self._calendar
|
||||
if as_dict:
|
||||
return data.to_dict()
|
||||
@@ -634,14 +682,14 @@ class TickerBase():
|
||||
return data
|
||||
|
||||
def get_info(self, proxy=None, as_dict=False, *args, **kwargs):
|
||||
self._get_fundamentals(proxy=proxy)
|
||||
self._get_info(proxy)
|
||||
data = self._info
|
||||
if as_dict:
|
||||
return data.to_dict()
|
||||
return data
|
||||
|
||||
def get_sustainability(self, proxy=None, as_dict=False, *args, **kwargs):
|
||||
self._get_fundamentals(proxy=proxy)
|
||||
self._get_info(proxy)
|
||||
data = self._sustainability
|
||||
if as_dict:
|
||||
return data.to_dict()
|
||||
@@ -791,3 +839,121 @@ class TickerBase():
|
||||
# parse news
|
||||
self._news = data.get("news", [])
|
||||
return self._news
|
||||
|
||||
def get_earnings_dates(self, proxy=None):
|
||||
if self._earnings_dates is not None:
|
||||
return self._earnings_dates
|
||||
|
||||
# setup proxy in requests format
|
||||
if proxy is not None:
|
||||
if isinstance(proxy, dict) and "https" in proxy:
|
||||
proxy = proxy["https"]
|
||||
proxy = {"https": proxy}
|
||||
|
||||
page_size = 100 # YF caps at 100, don't go higher
|
||||
page_offset = 0
|
||||
dates = None
|
||||
while True:
|
||||
url = "{}/calendar/earnings?symbol={}&offset={}&size={}".format(
|
||||
_ROOT_URL_, self.ticker, page_offset, page_size)
|
||||
|
||||
session = self.session or _requests
|
||||
data = session.get(
|
||||
url=url,
|
||||
proxies=proxy,
|
||||
headers=utils.user_agent_headers
|
||||
).text
|
||||
|
||||
if "Will be right back" in data:
|
||||
raise RuntimeError("*** YAHOO! FINANCE IS CURRENTLY DOWN! ***\n"
|
||||
"Our engineers are working quickly to resolve "
|
||||
"the issue. Thank you for your patience.")
|
||||
|
||||
try:
|
||||
data = _pd.read_html(data)[0]
|
||||
except ValueError:
|
||||
if page_offset == 0:
|
||||
# Should not fail on first page
|
||||
if "Showing Earnings for:" in data:
|
||||
# Actually YF was successful, problem is company doesn't have earnings history
|
||||
dates = utils.empty_earnings_dates_df()
|
||||
break
|
||||
|
||||
if dates is None:
|
||||
dates = data
|
||||
else:
|
||||
dates = _pd.concat([dates, data], axis=0)
|
||||
page_offset += page_size
|
||||
|
||||
if dates is None:
|
||||
raise Exception("No data found, symbol may be delisted")
|
||||
dates = dates.reset_index(drop=True)
|
||||
|
||||
# Drop redundant columns
|
||||
dates = dates.drop(["Symbol", "Company"], axis=1)
|
||||
|
||||
# Convert types
|
||||
for cn in ["EPS Estimate", "Reported EPS", "Surprise(%)"]:
|
||||
dates.loc[dates[cn] == '-', cn] = "NaN"
|
||||
dates[cn] = dates[cn].astype(float)
|
||||
|
||||
# Convert % to range 0->1:
|
||||
dates["Surprise(%)"] *= 0.01
|
||||
|
||||
# Parse earnings date string
|
||||
cn = "Earnings Date"
|
||||
# - remove AM/PM and timezone from date string
|
||||
tzinfo = dates[cn].str.extract('([AP]M[a-zA-Z]*)$')
|
||||
dates[cn] = dates[cn].replace(' [AP]M[a-zA-Z]*$', '', regex=True)
|
||||
# - split AM/PM from timezone
|
||||
tzinfo = tzinfo[0].str.extract('([AP]M)([a-zA-Z]*)', expand=True)
|
||||
tzinfo.columns = ["AM/PM", "TZ"]
|
||||
# - combine and parse
|
||||
dates[cn] = dates[cn] + ' ' + tzinfo["AM/PM"]
|
||||
dates[cn] = _pd.to_datetime(dates[cn], format="%b %d, %Y, %I %p")
|
||||
# - instead of attempting decoding of ambiguous timezone abbreviation, just use 'info':
|
||||
dates[cn] = dates[cn].dt.tz_localize(
|
||||
tz=self.info["exchangeTimezoneName"])
|
||||
|
||||
dates = dates.set_index("Earnings Date")
|
||||
|
||||
self._earnings_dates = dates
|
||||
|
||||
return dates
|
||||
|
||||
def get_earnings_history(self, proxy=None):
|
||||
if self._earnings_history:
|
||||
return self._earnings_history
|
||||
|
||||
# setup proxy in requests format
|
||||
if proxy is not None:
|
||||
if isinstance(proxy, dict) and "https" in proxy:
|
||||
proxy = proxy["https"]
|
||||
proxy = {"https": proxy}
|
||||
|
||||
url = "{}/calendar/earnings?symbol={}".format(_ROOT_URL_, self.ticker)
|
||||
session = self.session or _requests
|
||||
data = session.get(
|
||||
url=url,
|
||||
proxies=proxy,
|
||||
headers=utils.user_agent_headers
|
||||
).text
|
||||
|
||||
if "Will be right back" in data:
|
||||
raise RuntimeError("*** YAHOO! FINANCE IS CURRENTLY DOWN! ***\n"
|
||||
"Our engineers are working quickly to resolve "
|
||||
"the issue. Thank you for your patience.")
|
||||
|
||||
try:
|
||||
# read_html returns a list of pandas Dataframes of all the tables in `data`
|
||||
data = _pd.read_html(data)[0]
|
||||
data.replace("-", _np.nan, inplace=True)
|
||||
|
||||
data['EPS Estimate'] = _pd.to_numeric(data['EPS Estimate'])
|
||||
data['Reported EPS'] = _pd.to_numeric(data['Reported EPS'])
|
||||
self._earnings_history = data
|
||||
# if no tables are found a ValueError is thrown
|
||||
except ValueError:
|
||||
print("Could not find data for {}.".format(self.ticker))
|
||||
return
|
||||
return data
|
||||
|
||||
@@ -30,7 +30,7 @@ from . import shared
|
||||
|
||||
|
||||
def download(tickers, start=None, end=None, actions=False, threads=True,
|
||||
group_by='column', auto_adjust=False, back_adjust=False,
|
||||
group_by='column', auto_adjust=False, back_adjust=False, keepna=False,
|
||||
progress=True, period="max", show_errors=True, interval="1d", prepost=False,
|
||||
proxy=None, rounding=False, timeout=None, **kwargs):
|
||||
"""Download yahoo tickers
|
||||
@@ -56,6 +56,9 @@ def download(tickers, start=None, end=None, actions=False, threads=True,
|
||||
Default is False
|
||||
auto_adjust: bool
|
||||
Adjust all OHLC automatically? Default is False
|
||||
keepna: bool
|
||||
Keep NaN rows returned by Yahoo?
|
||||
Default is False
|
||||
actions: bool
|
||||
Download dividend + stock splits data. Default is False
|
||||
threads: bool / int
|
||||
@@ -65,7 +68,7 @@ def download(tickers, start=None, end=None, actions=False, threads=True,
|
||||
rounding: bool
|
||||
Optional. Round values to 2 decimal places?
|
||||
show_errors: bool
|
||||
Optional. Doesn't print errors if True
|
||||
Optional. Doesn't print errors if False
|
||||
timeout: None or float
|
||||
If not None stops waiting for a response after given number of
|
||||
seconds. (Can also be a fraction of a second e.g. 0.01)
|
||||
@@ -105,7 +108,7 @@ def download(tickers, start=None, end=None, actions=False, threads=True,
|
||||
_download_one_threaded(ticker, period=period, interval=interval,
|
||||
start=start, end=end, prepost=prepost,
|
||||
actions=actions, auto_adjust=auto_adjust,
|
||||
back_adjust=back_adjust,
|
||||
back_adjust=back_adjust, keepna=keepna,
|
||||
progress=(progress and i > 0), proxy=proxy,
|
||||
rounding=rounding, timeout=timeout)
|
||||
while len(shared._DFS) < len(tickers):
|
||||
@@ -117,7 +120,7 @@ def download(tickers, start=None, end=None, actions=False, threads=True,
|
||||
data = _download_one(ticker, period=period, interval=interval,
|
||||
start=start, end=end, prepost=prepost,
|
||||
actions=actions, auto_adjust=auto_adjust,
|
||||
back_adjust=back_adjust, proxy=proxy,
|
||||
back_adjust=back_adjust, keepna=keepna, proxy=proxy,
|
||||
rounding=rounding, timeout=timeout)
|
||||
shared._DFS[ticker.upper()] = data
|
||||
if progress:
|
||||
@@ -183,11 +186,11 @@ def _download_one_threaded(ticker, start=None, end=None,
|
||||
auto_adjust=False, back_adjust=False,
|
||||
actions=False, progress=True, period="max",
|
||||
interval="1d", prepost=False, proxy=None,
|
||||
rounding=False, timeout=None):
|
||||
keepna=False, rounding=False, timeout=None):
|
||||
|
||||
data = _download_one(ticker, start, end, auto_adjust, back_adjust,
|
||||
actions, period, interval, prepost, proxy, rounding,
|
||||
timeout)
|
||||
keepna, timeout)
|
||||
shared._DFS[ticker.upper()] = data
|
||||
if progress:
|
||||
shared._PROGRESS_BAR.animate()
|
||||
@@ -197,11 +200,11 @@ def _download_one(ticker, start=None, end=None,
|
||||
auto_adjust=False, back_adjust=False,
|
||||
actions=False, period="max", interval="1d",
|
||||
prepost=False, proxy=None, rounding=False,
|
||||
timeout=None):
|
||||
keepna=False, timeout=None):
|
||||
|
||||
return Ticker(ticker).history(period=period, interval=interval,
|
||||
start=start, end=end, prepost=prepost,
|
||||
actions=actions, auto_adjust=auto_adjust,
|
||||
back_adjust=back_adjust, proxy=proxy,
|
||||
rounding=rounding, many=True,
|
||||
rounding=rounding, keepna=keepna, many=True,
|
||||
timeout=timeout)
|
||||
|
||||
@@ -211,3 +211,11 @@ class Ticker(TickerBase):
|
||||
@property
|
||||
def analysis(self):
|
||||
return self.get_analysis()
|
||||
|
||||
@property
|
||||
def earnings_history(self):
|
||||
return self.get_earnings_history()
|
||||
|
||||
@property
|
||||
def earnings_dates(self):
|
||||
return self.get_earnings_dates()
|
||||
|
||||
@@ -30,14 +30,14 @@ class Tickers():
|
||||
def __repr__(self):
|
||||
return 'yfinance.Tickers object <%s>' % ",".join(self.symbols)
|
||||
|
||||
def __init__(self, tickers):
|
||||
def __init__(self, tickers, session=None):
|
||||
tickers = tickers if isinstance(
|
||||
tickers, list) else tickers.replace(',', ' ').split()
|
||||
self.symbols = [ticker.upper() for ticker in tickers]
|
||||
ticker_objects = {}
|
||||
|
||||
for ticker in self.symbols:
|
||||
ticker_objects[ticker] = Ticker(ticker)
|
||||
ticker_objects[ticker] = Ticker(ticker, session=session)
|
||||
|
||||
self.tickers = ticker_objects
|
||||
# self.tickers = _namedtuple(
|
||||
@@ -51,11 +51,11 @@ class Tickers():
|
||||
timeout=None, **kwargs):
|
||||
|
||||
return self.download(
|
||||
period, interval,
|
||||
start, end, prepost,
|
||||
actions, auto_adjust, proxy,
|
||||
threads, group_by, progress,
|
||||
timeout, **kwargs)
|
||||
period, interval,
|
||||
start, end, prepost,
|
||||
actions, auto_adjust, proxy,
|
||||
threads, group_by, progress,
|
||||
timeout, **kwargs)
|
||||
|
||||
def download(self, period="1mo", interval="1d",
|
||||
start=None, end=None, prepost=False,
|
||||
@@ -85,3 +85,12 @@ class Tickers():
|
||||
data.sort_index(level=0, axis=1, inplace=True)
|
||||
|
||||
return data
|
||||
|
||||
def news(self):
|
||||
collection = {}
|
||||
for ticker in self.symbols:
|
||||
collection[ticker] = []
|
||||
items = Ticker(ticker).news
|
||||
for item in items:
|
||||
collection[ticker].append(item)
|
||||
return collection
|
||||
|
||||
@@ -21,11 +21,15 @@
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import datetime as _datetime
|
||||
import pytz as _tz
|
||||
import requests as _requests
|
||||
import re as _re
|
||||
import pandas as _pd
|
||||
import numpy as _np
|
||||
import sys as _sys
|
||||
import os as _os
|
||||
import appdirs as _ad
|
||||
|
||||
try:
|
||||
import ujson as _json
|
||||
@@ -33,7 +37,8 @@ except ImportError:
|
||||
import json as _json
|
||||
|
||||
|
||||
user_agent_headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
|
||||
user_agent_headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
|
||||
|
||||
|
||||
def is_isin(string):
|
||||
@@ -88,6 +93,13 @@ def empty_df(index=[]):
|
||||
return empty
|
||||
|
||||
|
||||
def empty_earnings_dates_df():
|
||||
empty = _pd.DataFrame(
|
||||
columns=["Symbol", "Company", "Earnings Date",
|
||||
"EPS Estimate", "Reported EPS", "Surprise(%)"])
|
||||
return empty
|
||||
|
||||
|
||||
def get_html(url, proxy=None, session=None):
|
||||
session = session or _requests
|
||||
html = session.get(url=url, proxies=proxy, headers=user_agent_headers).text
|
||||
@@ -109,8 +121,9 @@ def get_json(url, proxy=None, session=None):
|
||||
'context']['dispatcher']['stores']['QuoteSummaryStore']
|
||||
# add data about Shares Outstanding for companies' tickers if they are available
|
||||
try:
|
||||
data['annualBasicAverageShares'] = _json.loads(json_str)[
|
||||
'context']['dispatcher']['stores']['QuoteTimeSeriesStore']['timeSeries']['annualBasicAverageShares']
|
||||
data['annualBasicAverageShares'] = _json.loads(
|
||||
json_str)['context']['dispatcher']['stores'][
|
||||
'QuoteTimeSeriesStore']['timeSeries']['annualBasicAverageShares']
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -126,6 +139,23 @@ def camel2title(o):
|
||||
return [_re.sub("([a-z])([A-Z])", r"\g<1> \g<2>", i).title() for i in o]
|
||||
|
||||
|
||||
def _parse_user_dt(dt, exchange_tz):
|
||||
if isinstance(dt, int):
|
||||
## Should already be epoch, test with conversion:
|
||||
_datetime.datetime.fromtimestamp(dt)
|
||||
else:
|
||||
# Convert str/date -> datetime, set tzinfo=exchange, get timestamp:
|
||||
if isinstance(dt, str):
|
||||
dt = _datetime.datetime.strptime(str(dt), '%Y-%m-%d')
|
||||
if isinstance(dt, _datetime.date) and not isinstance(dt, _datetime.datetime):
|
||||
dt = _datetime.datetime.combine(dt, _datetime.time(0))
|
||||
if isinstance(dt, _datetime.datetime) and dt.tzinfo is None:
|
||||
# Assume user is referring to exchange's timezone
|
||||
dt = _tz.timezone(exchange_tz).localize(dt)
|
||||
dt = int(dt.timestamp())
|
||||
return dt
|
||||
|
||||
|
||||
def auto_adjust(data):
|
||||
df = data.copy()
|
||||
ratio = df["Close"] / df["Adj Close"]
|
||||
@@ -167,7 +197,7 @@ def back_adjust(data):
|
||||
return df[["Open", "High", "Low", "Close", "Volume"]]
|
||||
|
||||
|
||||
def parse_quotes(data, tz=None):
|
||||
def parse_quotes(data):
|
||||
timestamps = data["timestamp"]
|
||||
ohlc = data["indicators"]["quote"][0]
|
||||
volumes = ohlc["volume"]
|
||||
@@ -190,15 +220,14 @@ def parse_quotes(data, tz=None):
|
||||
quotes.index = _pd.to_datetime(timestamps, unit="s")
|
||||
quotes.sort_index(inplace=True)
|
||||
|
||||
if tz is not None:
|
||||
quotes.index = quotes.index.tz_localize(tz)
|
||||
|
||||
return quotes
|
||||
|
||||
|
||||
def parse_actions(data, tz=None):
|
||||
dividends = _pd.DataFrame(columns=["Dividends"])
|
||||
splits = _pd.DataFrame(columns=["Stock Splits"])
|
||||
def parse_actions(data):
|
||||
dividends = _pd.DataFrame(
|
||||
columns=["Dividends"], index=_pd.DatetimeIndex([]))
|
||||
splits = _pd.DataFrame(
|
||||
columns=["Stock Splits"], index=_pd.DatetimeIndex([]))
|
||||
|
||||
if "events" in data:
|
||||
if "dividends" in data["events"]:
|
||||
@@ -207,8 +236,6 @@ def parse_actions(data, tz=None):
|
||||
dividends.set_index("date", inplace=True)
|
||||
dividends.index = _pd.to_datetime(dividends.index, unit="s")
|
||||
dividends.sort_index(inplace=True)
|
||||
if tz is not None:
|
||||
dividends.index = dividends.index.tz_localize(tz)
|
||||
|
||||
dividends.columns = ["Dividends"]
|
||||
|
||||
@@ -218,8 +245,6 @@ def parse_actions(data, tz=None):
|
||||
splits.set_index("date", inplace=True)
|
||||
splits.index = _pd.to_datetime(splits.index, unit="s")
|
||||
splits.sort_index(inplace=True)
|
||||
if tz is not None:
|
||||
splits.index = splits.index.tz_localize(tz)
|
||||
splits["Stock Splits"] = splits["numerator"] / \
|
||||
splits["denominator"]
|
||||
splits = splits["Stock Splits"]
|
||||
@@ -227,6 +252,19 @@ def parse_actions(data, tz=None):
|
||||
return dividends, splits
|
||||
|
||||
|
||||
def fix_Yahoo_dst_issue(df, interval):
|
||||
if interval in ["1d","1w","1wk"]:
|
||||
# These intervals should start at time 00:00. But for some combinations of date and timezone,
|
||||
# Yahoo has time off by few hours (e.g. Brazil 23:00 around Jan-2022). Suspect DST problem.
|
||||
# The clue is (a) minutes=0 and (b) hour near 0.
|
||||
# Obviously Yahoo meant 00:00, so ensure this doesn't affect date conversion:
|
||||
f_pre_midnight = (df.index.minute == 0) & (df.index.hour.isin([22,23]))
|
||||
dst_error_hours = _np.array([0]*df.shape[0])
|
||||
dst_error_hours[f_pre_midnight] = 24-df.index[f_pre_midnight].hour
|
||||
df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
|
||||
return df
|
||||
|
||||
|
||||
class ProgressBar:
|
||||
def __init__(self, iterations, text='completed'):
|
||||
self.text = text
|
||||
@@ -275,3 +313,37 @@ class ProgressBar:
|
||||
|
||||
def __str__(self):
|
||||
return str(self.prog_bar)
|
||||
|
||||
|
||||
# Simple file cache of ticker->timezone:
|
||||
def get_cache_dirpath():
|
||||
return _os.path.join(_ad.user_cache_dir(), "py-yfinance")
|
||||
def cache_lookup_tkr_tz(tkr):
|
||||
fp = _os.path.join(get_cache_dirpath(), "tkr-tz.csv")
|
||||
if not _os.path.isfile(fp):
|
||||
return None
|
||||
|
||||
df = _pd.read_csv(fp)
|
||||
f = df["Ticker"] == tkr
|
||||
if sum(f) == 0:
|
||||
return None
|
||||
|
||||
return df["Tz"][f].iloc[0]
|
||||
def cache_store_tkr_tz(tkr,tz):
|
||||
df = _pd.DataFrame({"Ticker":[tkr], "Tz":[tz]})
|
||||
|
||||
dp = get_cache_dirpath()
|
||||
if not _os.path.isdir(dp):
|
||||
_os.makedirs(dp)
|
||||
fp = _os.path.join(dp, "tkr-tz.csv")
|
||||
if not _os.path.isfile(fp):
|
||||
df.to_csv(fp, index=False)
|
||||
return
|
||||
|
||||
df_all = _pd.read_csv(fp)
|
||||
f = df_all["Ticker"]==tkr
|
||||
if sum(f) > 0:
|
||||
raise Exception("Tkr {} tz already in cache".format(tkr))
|
||||
|
||||
_pd.concat([df_all,df]).to_csv(fp, index=False)
|
||||
|
||||
|
||||
@@ -1 +1 @@
|
||||
version = "0.1.69"
|
||||
version = "0.1.75"
|
||||
|
||||
Reference in New Issue
Block a user