Compare commits

..

10 Commits

Author SHA1 Message Date
ValueRaider
66af3080dd Bump version to 0.1.85 2022-11-03 19:04:45 +00:00
ValueRaider
9d396b9559 Merge pull request #1135 from ranaroussi/patch/unknown-ticker-timezone
Backport ticker tz verification for nice error
2022-11-02 15:18:26 +00:00
ValueRaider
23b6ad12c1 Backport ticker tz verification for nice error 2022-10-31 21:14:50 +00:00
ValueRaider
22131e9fc7 Merge pull request #1124 from Jossan84/main
Bugfix: Get logo url when no website exists
2022-10-27 22:34:18 +01:00
ValueRaider
e99e61f95a Bump version to 0.1.84 2022-10-26 00:12:29 +01:00
ValueRaider
a3fe95ea27 Make tz-cache thread-safe 2022-10-26 00:09:23 +01:00
ValueRaider
000cb70bcb Bump version to 0.1.83 2022-10-25 23:23:32 +01:00
ValueRaider
c8d9d06e75 Expose _fetch_ticker_tz() arguments 2022-10-25 23:21:56 +01:00
ValueRaider
a5e07a0375 Bump version to 0.1.82 2022-10-25 23:15:48 +01:00
ValueRaider
a0a12bcf4c Backport _fetch_ticker_tz() 2022-10-25 23:07:48 +01:00
26 changed files with 830 additions and 3608 deletions

View File

@@ -7,36 +7,14 @@ assignees: ''
---
# READ BEFORE POSTING
*** READ BEFORE POSTING ***
### Are you up-to-date?
Upgrade to the latest version and confirm the issue/bug is still there.
Before posting an issue - please upgrade to the latest version and confirm the issue/bug is still there.
Upgrade using:
`$ pip install yfinance --upgrade --no-cache-dir`
Confirm by running:
Bug still there? Delete this content and submit your bug report here and provide the following, as best you can:
`import yfinance as yf ; print(yf.__version__)`
and comparing against [PIP](https://pypi.org/project/yfinance/#history).
### Does Yahoo actually have the data?
Visit `finance.yahoo.com` and confim they have your data. Maybe your ticker was delisted.
Then check that you are spelling ticker *exactly* same as Yahoo.
### Are you spamming Yahoo?
Yahoo Finance free service has limit on query rate (roughly 100/s). Them delaying or blocking your spam is not a bug.
### Still think it's a bug?
Delete this default message and submit your bug report here, providing the following as best you can:
- Info about your system:
- yfinance version
- operating system
- Simple code that reproduces your problem
- The error message

View File

@@ -1,14 +0,0 @@
---
name: Feature request
about: Request a new feature
title: ''
labels: ''
assignees: ''
---
**Describe the problem**
**Describe the solution**
**Additional context**

7
.gitignore vendored
View File

@@ -9,10 +9,3 @@ build/
*.html
*.css
*.png
# Environments
.env
.venv
env/
venv/
ENV/

View File

@@ -1,41 +1,18 @@
Change Log
===========
0.2.0rc5
--------
- Improve financials error handling #1243
- Fix '100x price' repair #1244
0.1.85
------
- Fix info['log_url'] #1062
- Fix handling delisted ticker #1137
0.2.0rc4
--------
- Access to old financials tables via `get_income_stmt(legacy=True)`
- Optimise scraping financials & fundamentals, 2x faster
- Add 'capital gains' alongside dividends & splits for ETFs, and metadata available via `history_metadata`, plus a bunch of price fixes
For full list of changes see #1238
0.1.84
------
- Make tz-cache thread-safe
0.2.0rc2
--------
Financials
- fix financials tables to match website #1128 #1157
- lru_cache to optimise web requests #1147
Prices
- improve price repair #1148
- fix merging dividends/splits with day/week/monthly prices #1161
- fix the Yahoo DST fixes #1143
- improve bad/delisted ticker handling #1140
Misc
- fix 'trailingPegRatio' #1138
- improve error handling #1118
0.2.0rc1
--------
Jumping to 0.2 for this big update. 0.1.* will continue to receive bug-fixes
- timezone cache performance massively improved. Thanks @fredrik-corneliusson #1113 #1112 #1109 #1105 #1099
- price repair feature #1110
- fix merging of dividends/splits with prices #1069 #1086 #1102
- fix Yahoo returning latest price interval across 2 rows #1070
- optional: raise errors as exceptions: raise_errors=True #1104
- add proper unit tests #1069
0.1.83
------
- Reduce spam-effect of tz-fetch
0.1.81
------

View File

@@ -59,10 +59,7 @@ msft.info
# get historical market data
hist = msft.history(period="max")
# show meta information about the history (requires history() to be called first)
msft.history_metadata
# show actions (dividends, splits, capital gains)
# show actions (dividends, splits)
msft.actions
# show dividends
@@ -71,24 +68,9 @@ msft.dividends
# show splits
msft.splits
# show capital gains (for mutual funds & etfs)
msft.capital_gains
# show share count
msft.shares
# show financials:
# - income statement
msft.income_stmt
msft.quarterly_income_stmt
# - balance sheet
msft.balance_sheet
msft.quarterly_balance_sheet
# - cash flow statement
msft.cashflow
msft.quarterly_cashflow
# see `Ticker.get_income_stmt()` for more options
# show financials
msft.financials
msft.quarterly_financials
# show major holders
msft.major_holders
@@ -96,8 +78,13 @@ msft.major_holders
# show institutional holders
msft.institutional_holders
# show mutualfund holders
msft.mutualfund_holders
# show balance sheet
msft.balance_sheet
msft.quarterly_balance_sheet
# show cashflow
msft.cashflow
msft.quarterly_cashflow
# show earnings
msft.earnings
@@ -108,18 +95,11 @@ msft.sustainability
# show analysts recommendations
msft.recommendations
msft.recommendations_summary
# show analysts other work
msft.analyst_price_target
msft.revenue_forecasts
msft.earnings_forecasts
msft.earnings_trend
# show next event (earnings, etc)
msft.calendar
# Show future and historic earnings dates, returns at most next 4 quarters and last 8 quarters by default.
# Note: If more are needed use msft.get_earnings_dates(limit=XX) with increased limit argument.
# show all earnings dates
msft.earnings_dates
# show ISIN code - *experimental*
@@ -148,7 +128,6 @@ msft.history(..., proxy="PROXY_SERVER")
msft.get_actions(proxy="PROXY_SERVER")
msft.get_dividends(proxy="PROXY_SERVER")
msft.get_splits(proxy="PROXY_SERVER")
msft.get_capital_gains(proxy="PROXY_SERVER")
msft.get_balance_sheet(proxy="PROXY_SERVER")
msft.get_cashflow(proxy="PROXY_SERVER")
msft.option_chain(..., proxy="PROXY_SERVER")
@@ -163,7 +142,7 @@ the Ticker constructor.
import requests_cache
session = requests_cache.CachedSession('yfinance.cache')
session.headers['User-agent'] = 'my-program/1.0'
ticker = yf.Ticker('msft', session=session)
ticker = yf.Ticker('msft aapl goog', session=session)
# The scraped response will be stored in the cache
ticker.actions
```
@@ -174,11 +153,12 @@ To initialize multiple `Ticker` objects, use
import yfinance as yf
tickers = yf.Tickers('msft aapl goog')
# ^ returns a named tuple of Ticker objects
# access each ticker using (example)
tickers.tickers['MSFT'].info
tickers.tickers['AAPL'].history(period="1mo")
tickers.tickers['GOOG'].actions
tickers.tickers.MSFT.info
tickers.tickers.AAPL.history(period="1mo")
tickers.tickers.GOOG.actions
```
### Fetching data for multiple tickers
@@ -203,7 +183,7 @@ data = yf.download( # or pdr.get_data_yahoo(...
# fetch data by interval (including intraday if period < 60 days)
# valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
# (optional, default is '1d')
interval = "5d",
interval = "1m",
# Whether to ignore timezone when aligning ticker data from
# different timezones. Default is True. False may be useful for
@@ -218,9 +198,6 @@ data = yf.download( # or pdr.get_data_yahoo(...
# (optional, default is False)
auto_adjust = True,
# identify and attempt repair of currency unit mixups e.g. $/cents
repair = False,
# download pre/post regular market hours data
# (optional, default is False)
prepost = True,
@@ -297,15 +274,12 @@ To install `yfinance` using `conda`, see
### Requirements
- [Python](https://www.python.org) \>= 2.7, 3.4+
- [Pandas](https://github.com/pydata/pandas) \>= 1.3.0
- [Numpy](http://www.numpy.org) \>= 1.16.5
- [requests](http://docs.python-requests.org/en/master) \>= 2.26
- [lxml](https://pypi.org/project/lxml) \>= 4.9.1
- [appdirs](https://pypi.org/project/appdirs) \>= 1.4.4
- [pytz](https://pypi.org/project/pytz) \>=2022.5
- [frozendict](https://pypi.org/project/frozendict) \>= 2.3.4
- [beautifulsoup4](https://pypi.org/project/beautifulsoup4) \>= 4.11.1
- [html5lib](https://pypi.org/project/html5lib) \>= 1.1
- [Pandas](https://github.com/pydata/pandas) (tested to work with
\>=0.23.1)
- [Numpy](http://www.numpy.org) \>= 1.11.1
- [requests](http://docs.python-requests.org/en/master/) \>= 2.14.2
- [lxml](https://pypi.org/project/lxml/) \>= 4.5.1
- [appdirs](https://pypi.org/project/appdirs) \>=1.4.4
### Optional (if you want to use `pandas_datareader`)

View File

@@ -1,5 +1,5 @@
{% set name = "yfinance" %}
{% set version = "0.2.0" %}
{% set version = "0.1.58" %}
package:
name: "{{ name|lower }}"
@@ -16,30 +16,22 @@ build:
requirements:
host:
- pandas >=1.3.0
- pandas >=0.24.0
- numpy >=1.16.5
- requests >=2.26
- requests >=2.21
- multitasking >=0.0.7
- lxml >=4.9.1
- appdirs >=1.4.4
- pytz >=2022.5
- frozendict >=2.3.4
- beautifulsoup4 >=4.11.1
- html5lib >=1.1
- lxml >=4.5.1
- appdirs >= 1.4.4
- pip
- python
run:
- pandas >=1.3.0
- pandas >=0.24.0
- numpy >=1.16.5
- requests >=2.26
- requests >=2.21
- multitasking >=0.0.7
- lxml >=4.9.1
- appdirs >=1.4.4
- pytz >=2022.5
- frozendict >=2.3.4
- beautifulsoup4 >=4.11.1
- html5lib >=1.1
- lxml >=4.5.1
- appdirs >= 1.4.4
- python
test:

View File

@@ -1,10 +1,6 @@
pandas>=1.3.0
pandas>=0.24.0
numpy>=1.16.5
requests>=2.26
multitasking>=0.0.7
lxml>=4.9.1
lxml>=4.5.1
appdirs>=1.4.4
pytz>=2022.5
frozendict>=2.3.4
beautifulsoup4>=4.11.1
html5lib>=1.1

View File

@@ -38,8 +38,8 @@ setup(
classifiers=[
'License :: OSI Approved :: Apache Software License',
# 'Development Status :: 3 - Alpha',
'Development Status :: 4 - Beta',
#'Development Status :: 5 - Production/Stable',
# 'Development Status :: 4 - Beta',
'Development Status :: 5 - Production/Stable',
'Operating System :: OS Independent',
@@ -50,20 +50,20 @@ setup(
'Topic :: Software Development :: Libraries',
'Topic :: Software Development :: Libraries :: Python Modules',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
# 'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
],
platforms=['any'],
keywords='pandas, yahoo finance, pandas datareader',
packages=find_packages(exclude=['contrib', 'docs', 'tests', 'examples']),
install_requires=['pandas>=1.3.0', 'numpy>=1.16.5',
install_requires=['pandas>=0.24.0', 'numpy>=1.15',
'requests>=2.26', 'multitasking>=0.0.7',
'lxml>=4.9.1', 'appdirs>=1.4.4', 'pytz>=2022.5',
'frozendict>=2.3.4',
'beautifulsoup4>=4.11.1', 'html5lib>=1.1'],
'lxml>=4.5.1', 'appdirs>=1.4.4'],
entry_points={
'console_scripts': [
'sample=sample:main',

View File

@@ -28,6 +28,9 @@ class TestTicker(unittest.TestCase):
history = ticker.history(period="max")
assert(history.empty is False and history is not None)
histories = yf.download(symbols, period="1yr")
assert(histories.empty is False and histories is not None)
def test_attributes(self):
for ticker in tickers:
ticker.isin
@@ -37,27 +40,23 @@ class TestTicker(unittest.TestCase):
ticker.dividends
ticker.splits
ticker.actions
ticker.shares
ticker.info
ticker.calendar
ticker.recommendations
ticker.earnings
ticker.quarterly_earnings
ticker.income_stmt
ticker.quarterly_income_stmt
ticker.financials
ticker.quarterly_financials
ticker.balance_sheet
ticker.quarterly_balance_sheet
ticker.cashflow
ticker.quarterly_cashflow
ticker.recommendations_summary
ticker.analyst_price_target
ticker.revenue_forecasts
ticker.sustainability
ticker.options
ticker.news
ticker.earnings_trend
ticker.shares
ticker.earnings_history
ticker.earnings_dates
ticker.earnings_forecasts
def test_holders(self):
for ticker in tickers:

View File

@@ -1 +0,0 @@
#!/usr/bin/env python

View File

@@ -1,9 +0,0 @@
# -*- coding: utf-8 -*-
import sys
import os
_parent_dp = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
_src_dp = _parent_dp
sys.path.insert(0, _src_dp)
import yfinance

View File

@@ -1,499 +0,0 @@
from .context import yfinance as yf
import unittest
import datetime as _dt
import pytz as _tz
import numpy as _np
import pandas as _pd
import requests_cache
class TestPriceHistory(unittest.TestCase):
session = None
@classmethod
def setUpClass(cls):
cls.session = requests_cache.CachedSession(backend='memory')
@classmethod
def tearDownClass(cls):
if cls.session is not None:
cls.session.close()
def test_daily_index(self):
tkrs = ["BHP.AX", "IMP.JO", "BP.L", "PNL.L", "INTC"]
intervals = ["1d", "1wk", "1mo"]
for tkr in tkrs:
dat = yf.Ticker(tkr, session=self.session)
for interval in intervals:
df = dat.history(period="5y", interval=interval)
f = df.index.time == _dt.time(0)
self.assertTrue(f.all())
def test_duplicatingHourly(self):
tkrs = ["IMP.JO", "BHG.JO", "SSW.JO", "BP.L", "INTC"]
for tkr in tkrs:
dat = yf.Ticker(tkr, session=self.session)
tz = dat._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
dt_utc = _tz.timezone("UTC").localize(_dt.datetime.utcnow())
dt = dt_utc.astimezone(_tz.timezone(tz))
df = dat.history(start=dt.date() - _dt.timedelta(days=1), interval="1h")
dt0 = df.index[-2]
dt1 = df.index[-1]
try:
self.assertNotEqual(dt0.hour, dt1.hour)
except:
print("Ticker = ", tkr)
raise
def test_duplicatingDaily(self):
tkrs = ["IMP.JO", "BHG.JO", "SSW.JO", "BP.L", "INTC"]
test_run = False
for tkr in tkrs:
dat = yf.Ticker(tkr, session=self.session)
tz = dat._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
dt_utc = _tz.timezone("UTC").localize(_dt.datetime.utcnow())
dt = dt_utc.astimezone(_tz.timezone(tz))
if dt.time() < _dt.time(17, 0):
continue
test_run = True
df = dat.history(start=dt.date() - _dt.timedelta(days=7), interval="1d")
dt0 = df.index[-2]
dt1 = df.index[-1]
try:
self.assertNotEqual(dt0, dt1)
except:
print("Ticker = ", tkr)
raise
if not test_run:
self.skipTest("Skipping test_duplicatingDaily() because only expected to fail just after market close")
def test_duplicatingWeekly(self):
tkrs = ['MSFT', 'IWO', 'VFINX', '^GSPC', 'BTC-USD']
test_run = False
for tkr in tkrs:
dat = yf.Ticker(tkr, session=self.session)
tz = dat._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
dt = _tz.timezone(tz).localize(_dt.datetime.now())
if dt.date().weekday() not in [1, 2, 3, 4]:
continue
test_run = True
df = dat.history(start=dt.date() - _dt.timedelta(days=7), interval="1wk")
dt0 = df.index[-2]
dt1 = df.index[-1]
try:
self.assertNotEqual(dt0.week, dt1.week)
except:
print("Ticker={}: Last two rows within same week:".format(tkr))
print(df.iloc[df.shape[0] - 2:])
raise
if not test_run:
self.skipTest("Skipping test_duplicatingWeekly() because not possible to fail Monday/weekend")
def test_intraDayWithEvents(self):
# TASE dividend release pre-market, doesn't merge nicely with intra-day data so check still present
tkr = "ICL.TA"
# tkr = "ESLT.TA"
# tkr = "ONE.TA"
# tkr = "MGDL.TA"
start_d = _dt.date.today() - _dt.timedelta(days=60)
end_d = None
df_daily = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=True)
df_daily_divs = df_daily["Dividends"][df_daily["Dividends"] != 0]
if df_daily_divs.shape[0] == 0:
self.skipTest("Skipping test_intraDayWithEvents() because 'ICL.TA' has no dividend in last 60 days")
last_div_date = df_daily_divs.index[-1]
start_d = last_div_date.date()
end_d = last_div_date.date() + _dt.timedelta(days=1)
df = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="15m", actions=True)
self.assertTrue((df["Dividends"] != 0.0).any())
def test_dailyWithEvents(self):
# Reproduce issue #521
tkr1 = "QQQ"
tkr2 = "GDX"
start_d = "2014-12-29"
end_d = "2020-11-29"
df1 = yf.Ticker(tkr1).history(start=start_d, end=end_d, interval="1d", actions=True)
df2 = yf.Ticker(tkr2).history(start=start_d, end=end_d, interval="1d", actions=True)
self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any())
self.assertTrue(((df2["Dividends"] > 0) | (df2["Stock Splits"] > 0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{} missing these dates: {}".format(tkr1, missing_from_df1))
print("{} missing these dates: {}".format(tkr2, missing_from_df2))
raise
# Test that index same with and without events:
tkrs = [tkr1, tkr2]
for tkr in tkrs:
df1 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=True)
df2 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1d", actions=False)
self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{}-with-events missing these dates: {}".format(tkr, missing_from_df1))
print("{}-without-events missing these dates: {}".format(tkr, missing_from_df2))
raise
def test_weeklyWithEvents(self):
# Reproduce issue #521
tkr1 = "QQQ"
tkr2 = "GDX"
start_d = "2014-12-29"
end_d = "2020-11-29"
df1 = yf.Ticker(tkr1).history(start=start_d, end=end_d, interval="1wk", actions=True)
df2 = yf.Ticker(tkr2).history(start=start_d, end=end_d, interval="1wk", actions=True)
self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any())
self.assertTrue(((df2["Dividends"] > 0) | (df2["Stock Splits"] > 0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{} missing these dates: {}".format(tkr1, missing_from_df1))
print("{} missing these dates: {}".format(tkr2, missing_from_df2))
raise
# Test that index same with and without events:
tkrs = [tkr1, tkr2]
for tkr in tkrs:
df1 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1wk", actions=True)
df2 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1wk", actions=False)
self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{}-with-events missing these dates: {}".format(tkr, missing_from_df1))
print("{}-without-events missing these dates: {}".format(tkr, missing_from_df2))
raise
def test_monthlyWithEvents(self):
tkr1 = "QQQ"
tkr2 = "GDX"
start_d = "2014-12-29"
end_d = "2020-11-29"
df1 = yf.Ticker(tkr1).history(start=start_d, end=end_d, interval="1mo", actions=True)
df2 = yf.Ticker(tkr2).history(start=start_d, end=end_d, interval="1mo", actions=True)
self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any())
self.assertTrue(((df2["Dividends"] > 0) | (df2["Stock Splits"] > 0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{} missing these dates: {}".format(tkr1, missing_from_df1))
print("{} missing these dates: {}".format(tkr2, missing_from_df2))
raise
# Test that index same with and without events:
tkrs = [tkr1, tkr2]
for tkr in tkrs:
df1 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1mo", actions=True)
df2 = yf.Ticker(tkr, session=self.session).history(start=start_d, end=end_d, interval="1mo", actions=False)
self.assertTrue(((df1["Dividends"] > 0) | (df1["Stock Splits"] > 0)).any())
try:
self.assertTrue(df1.index.equals(df2.index))
except:
missing_from_df1 = df2.index.difference(df1.index)
missing_from_df2 = df1.index.difference(df2.index)
print("{}-with-events missing these dates: {}".format(tkr, missing_from_df1))
print("{}-without-events missing these dates: {}".format(tkr, missing_from_df2))
raise
def test_tz_dst_ambiguous(self):
# Reproduce issue #1100
try:
yf.Ticker("ESLT.TA", session=self.session).history(start="2002-10-06", end="2002-10-09", interval="1d")
except _tz.exceptions.AmbiguousTimeError:
raise Exception("Ambiguous DST issue not resolved")
def test_dst_fix(self):
# Daily intervals should start at time 00:00. But for some combinations of date and timezone,
# Yahoo has time off by few hours (e.g. Brazil 23:00 around Jan-2022). Suspect DST problem.
# The clue is (a) minutes=0 and (b) hour near 0.
# Obviously Yahoo meant 00:00, so ensure this doesn't affect date conversion.
# The correction is successful if no days are weekend, and weekly data begins Monday
tkr = "AGRO3.SA"
dat = yf.Ticker(tkr, session=self.session)
start = "2021-01-11"
end = "2022-11-05"
interval = "1d"
df = dat.history(start=start, end=end, interval=interval)
self.assertTrue(((df.index.weekday >= 0) & (df.index.weekday <= 4)).all())
interval = "1wk"
df = dat.history(start=start, end=end, interval=interval)
try:
self.assertTrue((df.index.weekday == 0).all())
except:
print("Weekly data not aligned to Monday")
raise
def test_weekly_2rows_fix(self):
tkr = "AMZN"
start = _dt.date.today() - _dt.timedelta(days=14)
start -= _dt.timedelta(days=start.weekday())
dat = yf.Ticker(tkr)
df = dat.history(start=start, interval="1wk")
self.assertTrue((df.index.weekday == 0).all())
def test_repair_100x_weekly(self):
# Setup:
tkr = "PNL.L"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.info["exchangeTimezoneName"]
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
df = _pd.DataFrame(data={"Open": [470.5, 473.5, 474.5, 470],
"High": [476, 476.5, 477, 480],
"Low": [470.5, 470, 465.5, 468.26],
"Close": [475, 473.5, 472, 473.5],
"Adj Close": [475, 473.5, 472, 473.5],
"Volume": [2295613, 2245604, 3000287, 2635611]},
index=_pd.to_datetime([_dt.date(2022, 10, 23),
_dt.date(2022, 10, 16),
_dt.date(2022, 10, 9),
_dt.date(2022, 10, 2)]))
df = df.sort_index()
df.index.name = "Date"
df_bad = df.copy()
df_bad.loc["2022-10-23", "Close"] *= 100
df_bad.loc["2022-10-16", "Low"] *= 100
df_bad.loc["2022-10-2", "Open"] *= 100
df.index = df.index.tz_localize(tz_exchange)
df_bad.index = df_bad.index.tz_localize(tz_exchange)
# Run test
df_repaired = dat._fix_unit_mixups(df_bad, "1wk", tz_exchange)
# First test - no errors left
for c in data_cols:
try:
self.assertTrue(_np.isclose(df_repaired[c], df[c], rtol=1e-2).all())
except:
print(df[c])
print(df_repaired[c])
raise
# Second test - all differences should be either ~1x or ~100x
ratio = df_bad[data_cols].values / df[data_cols].values
ratio = ratio.round(2)
# - round near-100 ratio to 100:
f = ratio > 90
ratio[f] = (ratio[f] / 10).round().astype(int) * 10 # round ratio to nearest 10
# - now test
f_100 = ratio == 100
f_1 = ratio == 1
self.assertTrue((f_100 | f_1).all())
def test_repair_100x_weekly_preSplit(self):
# PNL.L has a stock-split in 2022. Sometimes requesting data before 2022 is not split-adjusted.
tkr = "PNL.L"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.info["exchangeTimezoneName"]
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
df = _pd.DataFrame(data={"Open": [400, 398, 392.5, 417],
"High": [421, 425, 419, 420.5],
"Low": [400, 380.5, 376.5, 396],
"Close": [410, 409.5, 402, 399],
"Adj Close": [398.02, 397.53, 390.25, 387.34],
"Volume": [3232600, 3773900, 10835000, 4257900]},
index=_pd.to_datetime([_dt.date(2020, 3, 30),
_dt.date(2020, 3, 23),
_dt.date(2020, 3, 16),
_dt.date(2020, 3, 9)]))
df = df.sort_index()
# Simulate data missing split-adjustment:
df[data_cols] *= 100.0
df["Volume"] *= 0.01
#
df.index.name = "Date"
# Create 100x errors:
df_bad = df.copy()
df_bad.loc["2020-03-30", "Close"] *= 100
df_bad.loc["2020-03-23", "Low"] *= 100
df_bad.loc["2020-03-09", "Open"] *= 100
df.index = df.index.tz_localize(tz_exchange)
df_bad.index = df_bad.index.tz_localize(tz_exchange)
df_repaired = dat._fix_unit_mixups(df_bad, "1wk", tz_exchange)
# First test - no errors left
for c in data_cols:
try:
self.assertTrue(_np.isclose(df_repaired[c], df[c], rtol=1e-2).all())
except:
print("Mismatch in column", c)
print("- df_repaired:")
print(df_repaired[c])
print("- answer:")
print(df[c])
raise
# Second test - all differences should be either ~1x or ~100x
ratio = df_bad[data_cols].values / df[data_cols].values
ratio = ratio.round(2)
# - round near-100 ratio to 100:
f = ratio > 90
ratio[f] = (ratio[f] / 10).round().astype(int) * 10 # round ratio to nearest 10
# - now test
f_100 = ratio == 100
f_1 = ratio == 1
self.assertTrue((f_100 | f_1).all())
def test_repair_100x_daily(self):
tkr = "PNL.L"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.info["exchangeTimezoneName"]
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
df = _pd.DataFrame(data={"Open": [478, 476, 476, 472],
"High": [478, 477.5, 477, 475],
"Low": [474.02, 474, 473, 470.75],
"Close": [475.5, 475.5, 474.5, 475],
"Adj Close": [475.5, 475.5, 474.5, 475],
"Volume": [436414, 485947, 358067, 287620]},
index=_pd.to_datetime([_dt.date(2022, 11, 1),
_dt.date(2022, 10, 31),
_dt.date(2022, 10, 28),
_dt.date(2022, 10, 27)]))
df = df.sort_index()
df.index.name = "Date"
df_bad = df.copy()
df_bad.loc["2022-11-01", "Close"] *= 100
df_bad.loc["2022-10-31", "Low"] *= 100
df_bad.loc["2022-10-27", "Open"] *= 100
df.index = df.index.tz_localize(tz_exchange)
df_bad.index = df_bad.index.tz_localize(tz_exchange)
df_repaired = dat._fix_unit_mixups(df_bad, "1d", tz_exchange)
# First test - no errors left
for c in data_cols:
self.assertTrue(_np.isclose(df_repaired[c], df[c], rtol=1e-2).all())
# Second test - all differences should be either ~1x or ~100x
ratio = df_bad[data_cols].values / df[data_cols].values
ratio = ratio.round(2)
# - round near-100 ratio to 100:
f = ratio > 90
ratio[f] = (ratio[f] / 10).round().astype(int) * 10 # round ratio to nearest 10
# - now test
f_100 = ratio == 100
f_1 = ratio == 1
self.assertTrue((f_100 | f_1).all())
def test_repair_zeroes_daily(self):
tkr = "BBIL.L"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.info["exchangeTimezoneName"]
df_bad = _pd.DataFrame(data={"Open": [0, 102.04, 102.04],
"High": [0, 102.1, 102.11],
"Low": [0, 102.04, 102.04],
"Close": [103.03, 102.05, 102.08],
"Adj Close": [102.03, 102.05, 102.08],
"Volume": [560, 137, 117]},
index=_pd.to_datetime([_dt.datetime(2022, 11, 1),
_dt.datetime(2022, 10, 31),
_dt.datetime(2022, 10, 30)]))
df_bad = df_bad.sort_index()
df_bad.index.name = "Date"
df_bad.index = df_bad.index.tz_localize(tz_exchange)
repaired_df = dat._fix_zeroes(df_bad, "1d", tz_exchange)
correct_df = df_bad.copy()
correct_df.loc["2022-11-01", "Open"] = 102.080002
correct_df.loc["2022-11-01", "Low"] = 102.032501
correct_df.loc["2022-11-01", "High"] = 102.080002
for c in ["Open", "Low", "High", "Close"]:
self.assertTrue(_np.isclose(repaired_df[c], correct_df[c], rtol=1e-8).all())
def test_repair_zeroes_hourly(self):
tkr = "INTC"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.info["exchangeTimezoneName"]
df_bad = _pd.DataFrame(data={"Open": [29.68, 29.49, 29.545, _np.nan, 29.485],
"High": [29.68, 29.625, 29.58, _np.nan, 29.49],
"Low": [29.46, 29.4, 29.45, _np.nan, 29.31],
"Close": [29.485, 29.545, 29.485, _np.nan, 29.325],
"Adj Close": [29.485, 29.545, 29.485, _np.nan, 29.325],
"Volume": [3258528, 2140195, 1621010, 0, 0]},
index=_pd.to_datetime([_dt.datetime(2022,11,25, 9,30),
_dt.datetime(2022,11,25, 10,30),
_dt.datetime(2022,11,25, 11,30),
_dt.datetime(2022,11,25, 12,30),
_dt.datetime(2022,11,25, 13,00)]))
df_bad = df_bad.sort_index()
df_bad.index.name = "Date"
df_bad.index = df_bad.index.tz_localize(tz_exchange)
repaired_df = dat._fix_zeroes(df_bad, "1h", tz_exchange)
correct_df = df_bad.copy()
idx = _pd.Timestamp(2022,11,25, 12,30).tz_localize(tz_exchange)
correct_df.loc[idx, "Open"] = 29.485001
correct_df.loc[idx, "High"] = 29.49
correct_df.loc[idx, "Low"] = 29.43
correct_df.loc[idx, "Close"] = 29.455
correct_df.loc[idx, "Adj Close"] = 29.455
correct_df.loc[idx, "Volume"] = 609164
for c in ["Open", "Low", "High", "Close"]:
try:
self.assertTrue(_np.isclose(repaired_df[c], correct_df[c], rtol=1e-7).all())
except:
print("COLUMN", c)
print(repaired_df)
print(correct_df[c])
print(repaired_df[c] - correct_df[c])
raise
if __name__ == '__main__':
unittest.main()
# # Run tests sequentially:
# import inspect
# test_src = inspect.getsource(TestPriceHistory)
# unittest.TestLoader.sortTestMethodsUsing = lambda _, x, y: (
# test_src.index(f"def {x}") - test_src.index(f"def {y}")
# )
# unittest.main(verbosity=2)

View File

@@ -1,618 +0,0 @@
"""
Tests for Ticker
To run all tests in suite from commandline:
python -m unittest tests.ticker
Specific test class:
python -m unittest tests.ticker.TestTicker
"""
import pandas as pd
from .context import yfinance as yf
import unittest
import requests_cache
# Set this to see the exact requests that are made during tests
DEBUG_LOG_REQUESTS = False
if DEBUG_LOG_REQUESTS:
import logging
logging.basicConfig(level=logging.DEBUG)
class TestTicker(unittest.TestCase):
session = None
@classmethod
def setUpClass(cls):
cls.session = requests_cache.CachedSession(backend='memory')
@classmethod
def tearDownClass(cls):
if cls.session is not None:
cls.session.close()
def test_getTz(self):
tkrs = ["IMP.JO", "BHG.JO", "SSW.JO", "BP.L", "INTC"]
for tkr in tkrs:
# First step: remove ticker from tz-cache
yf.utils.get_tz_cache().store(tkr, None)
# Test:
dat = yf.Ticker(tkr, session=self.session)
tz = dat._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
self.assertIsNotNone(tz)
def test_badTicker(self):
# Check yfinance doesn't die when ticker delisted
tkr = "AM2Z.TA"
dat = yf.Ticker(tkr, session=self.session)
dat.history(period="1wk")
dat.history(start="2022-01-01")
dat.history(start="2022-01-01", end="2022-03-01")
yf.download([tkr], period="1wk")
dat.isin
dat.major_holders
dat.institutional_holders
dat.mutualfund_holders
dat.dividends
dat.splits
dat.actions
dat.shares
dat.info
dat.calendar
dat.recommendations
dat.earnings
dat.quarterly_earnings
dat.income_stmt
dat.quarterly_income_stmt
dat.balance_sheet
dat.quarterly_balance_sheet
dat.cashflow
dat.quarterly_cashflow
dat.recommendations_summary
dat.analyst_price_target
dat.revenue_forecasts
dat.sustainability
dat.options
dat.news
dat.earnings_trend
dat.earnings_dates
dat.earnings_forecasts
def test_goodTicker(self):
# that yfinance works when full api is called on same instance of ticker
tkr = "IBM"
dat = yf.Ticker(tkr, session=self.session)
dat.isin
dat.major_holders
dat.institutional_holders
dat.mutualfund_holders
dat.dividends
dat.splits
dat.actions
dat.shares
dat.info
dat.calendar
dat.recommendations
dat.earnings
dat.quarterly_earnings
dat.income_stmt
dat.quarterly_income_stmt
dat.balance_sheet
dat.quarterly_balance_sheet
dat.cashflow
dat.quarterly_cashflow
dat.recommendations_summary
dat.analyst_price_target
dat.revenue_forecasts
dat.sustainability
dat.options
dat.news
dat.earnings_trend
dat.earnings_dates
dat.earnings_forecasts
dat.history(period="1wk")
dat.history(start="2022-01-01")
dat.history(start="2022-01-01", end="2022-03-01")
yf.download([tkr], period="1wk")
class TestTickerHistory(unittest.TestCase):
session = None
@classmethod
def setUpClass(cls):
cls.session = requests_cache.CachedSession(backend='memory')
@classmethod
def tearDownClass(cls):
if cls.session is not None:
cls.session.close()
def setUp(self):
# use a ticker that has dividends
self.ticker = yf.Ticker("IBM", session=self.session)
def tearDown(self):
self.ticker = None
def test_history(self):
with self.assertRaises(RuntimeError):
self.ticker.history_metadata
data = self.ticker.history("1y")
self.assertIn("IBM", self.ticker.history_metadata.values(), "metadata missing")
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
def test_no_expensive_calls_introduced(self):
"""
Make sure calling history to get price data has not introduced more calls to yahoo than absolutely necessary.
As doing other type of scraping calls than "query2.finance.yahoo.com/v8/finance/chart" to yahoo website
will quickly trigger spam-block when doing bulk download of history data.
"""
session = requests_cache.CachedSession(backend='memory')
ticker = yf.Ticker("GOOGL", session=session)
ticker.history("1y")
actual_urls_called = tuple([r.url for r in session.cache.filter()])
session.close()
expected_urls = (
'https://query2.finance.yahoo.com/v8/finance/chart/GOOGL?range=1y&interval=1d&includePrePost=False&events=div%2Csplits%2CcapitalGains',
)
self.assertEqual(expected_urls, actual_urls_called, "Different than expected url used to fetch history.")
def test_dividends(self):
data = self.ticker.dividends
self.assertIsInstance(data, pd.Series, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
def test_splits(self):
data = self.ticker.splits
self.assertIsInstance(data, pd.Series, "data has wrong type")
# self.assertFalse(data.empty, "data is empty")
def test_actions(self):
data = self.ticker.actions
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
class TestTickerEarnings(unittest.TestCase):
session = None
@classmethod
def setUpClass(cls):
cls.session = requests_cache.CachedSession(backend='memory')
@classmethod
def tearDownClass(cls):
if cls.session is not None:
cls.session.close()
def setUp(self):
self.ticker = yf.Ticker("GOOGL", session=self.session)
def tearDown(self):
self.ticker = None
def test_earnings(self):
data = self.ticker.earnings
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.earnings
self.assertIs(data, data_cached, "data not cached")
def test_quarterly_earnings(self):
data = self.ticker.quarterly_earnings
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.quarterly_earnings
self.assertIs(data, data_cached, "data not cached")
def test_earnings_forecasts(self):
data = self.ticker.earnings_forecasts
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.earnings_forecasts
self.assertIs(data, data_cached, "data not cached")
def test_earnings_dates(self):
data = self.ticker.earnings_dates
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.earnings_dates
self.assertIs(data, data_cached, "data not cached")
def test_earnings_trend(self):
data = self.ticker.earnings_trend
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.earnings_trend
self.assertIs(data, data_cached, "data not cached")
def test_earnings_dates_with_limit(self):
# use ticker with lots of historic earnings
ticker = yf.Ticker("IBM")
limit = 110
data = ticker.get_earnings_dates(limit=limit)
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
self.assertEqual(len(data), limit, "Wrong number or rows")
data_cached = ticker.get_earnings_dates(limit=limit)
self.assertIs(data, data_cached, "data not cached")
class TestTickerHolders(unittest.TestCase):
session = None
@classmethod
def setUpClass(cls):
cls.session = requests_cache.CachedSession(backend='memory')
@classmethod
def tearDownClass(cls):
if cls.session is not None:
cls.session.close()
def setUp(self):
self.ticker = yf.Ticker("GOOGL", session=self.session)
def tearDown(self):
self.ticker = None
def test_major_holders(self):
data = self.ticker.major_holders
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.major_holders
self.assertIs(data, data_cached, "data not cached")
def test_institutional_holders(self):
data = self.ticker.institutional_holders
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.institutional_holders
self.assertIs(data, data_cached, "data not cached")
def test_mutualfund_holders(self):
data = self.ticker.mutualfund_holders
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.mutualfund_holders
self.assertIs(data, data_cached, "data not cached")
class TestTickerMiscFinancials(unittest.TestCase):
session = None
@classmethod
def setUpClass(cls):
cls.session = requests_cache.CachedSession(backend='memory')
@classmethod
def tearDownClass(cls):
if cls.session is not None:
cls.session.close()
def setUp(self):
self.ticker = yf.Ticker("GOOGL", session=self.session)
# For ticker 'BSE.AX' (and others), Yahoo not returning
# full quarterly financials (usually cash-flow) with all entries,
# instead returns a smaller version in different data store.
self.ticker_old_fmt = yf.Ticker("BSE.AX", session=self.session)
def tearDown(self):
self.ticker = None
def test_income_statement(self):
expected_keys = ["Total Revenue", "Basic EPS"]
expected_periods_days = 365
# Test contents of table
data = self.ticker.get_income_stmt(pretty=True)
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
period = abs((data.columns[0]-data.columns[1]).days)
self.assertLess(abs(period-expected_periods_days), 20, "Not returning annual financials")
# Test property defaults
data2 = self.ticker.income_stmt
self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
# Test pretty=False
expected_keys = [k.replace(' ', '') for k in expected_keys]
data = self.ticker.get_income_stmt(pretty=False)
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
# Test to_dict
data = self.ticker.get_income_stmt(as_dict=True)
self.assertIsInstance(data, dict, "data has wrong type")
def test_quarterly_income_statement(self):
expected_keys = ["Total Revenue", "Basic EPS"]
expected_periods_days = 365//4
# Test contents of table
data = self.ticker.get_income_stmt(pretty=True, freq="quarterly")
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
period = abs((data.columns[0]-data.columns[1]).days)
self.assertLess(abs(period-expected_periods_days), 20, "Not returning quarterly financials")
# Test property defaults
data2 = self.ticker.quarterly_income_stmt
self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
# Test pretty=False
expected_keys = [k.replace(' ', '') for k in expected_keys]
data = self.ticker.get_income_stmt(pretty=False, freq="quarterly")
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
# Test to_dict
data = self.ticker.get_income_stmt(as_dict=True)
self.assertIsInstance(data, dict, "data has wrong type")
def test_quarterly_income_statement_old_fmt(self):
expected_row = "TotalRevenue"
data = self.ticker_old_fmt.get_income_stmt(freq="quarterly", legacy=True)
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
self.assertIn(expected_row, data.index, "Did not find expected row in index")
data_cached = self.ticker_old_fmt.get_income_stmt(freq="quarterly", legacy=True)
self.assertIs(data, data_cached, "data not cached")
def test_balance_sheet(self):
expected_keys = ["Total Assets", "Net PPE"]
expected_periods_days = 365
# Test contents of table
data = self.ticker.get_balance_sheet(pretty=True)
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
period = abs((data.columns[0]-data.columns[1]).days)
self.assertLess(abs(period-expected_periods_days), 20, "Not returning annual financials")
# Test property defaults
data2 = self.ticker.balance_sheet
self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
# Test pretty=False
expected_keys = [k.replace(' ', '') for k in expected_keys]
data = self.ticker.get_balance_sheet(pretty=False)
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
# Test to_dict
data = self.ticker.get_balance_sheet(as_dict=True)
self.assertIsInstance(data, dict, "data has wrong type")
def test_quarterly_balance_sheet(self):
expected_keys = ["Total Assets", "Net PPE"]
expected_periods_days = 365//4
# Test contents of table
data = self.ticker.get_balance_sheet(pretty=True, freq="quarterly")
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
period = abs((data.columns[0]-data.columns[1]).days)
self.assertLess(abs(period-expected_periods_days), 20, "Not returning quarterly financials")
# Test property defaults
data2 = self.ticker.quarterly_balance_sheet
self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
# Test pretty=False
expected_keys = [k.replace(' ', '') for k in expected_keys]
data = self.ticker.get_balance_sheet(pretty=False, freq="quarterly")
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
# Test to_dict
data = self.ticker.get_balance_sheet(as_dict=True, freq="quarterly")
self.assertIsInstance(data, dict, "data has wrong type")
def test_quarterly_balance_sheet_old_fmt(self):
expected_row = "TotalAssets"
data = self.ticker_old_fmt.get_balance_sheet(freq="quarterly", legacy=True)
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
self.assertIn(expected_row, data.index, "Did not find expected row in index")
data_cached = self.ticker_old_fmt.get_balance_sheet(freq="quarterly", legacy=True)
self.assertIs(data, data_cached, "data not cached")
def test_cash_flow(self):
expected_keys = ["Operating Cash Flow", "Net PPE Purchase And Sale"]
expected_periods_days = 365
# Test contents of table
data = self.ticker.get_cashflow(pretty=True)
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
period = abs((data.columns[0]-data.columns[1]).days)
self.assertLess(abs(period-expected_periods_days), 20, "Not returning annual financials")
# Test property defaults
data2 = self.ticker.cashflow
self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
# Test pretty=False
expected_keys = [k.replace(' ', '') for k in expected_keys]
data = self.ticker.get_cashflow(pretty=False)
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
# Test to_dict
data = self.ticker.get_cashflow(as_dict=True)
self.assertIsInstance(data, dict, "data has wrong type")
def test_quarterly_cash_flow(self):
expected_keys = ["Operating Cash Flow", "Net PPE Purchase And Sale"]
expected_periods_days = 365//4
# Test contents of table
data = self.ticker.get_cashflow(pretty=True, freq="quarterly")
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
period = abs((data.columns[0]-data.columns[1]).days)
self.assertLess(abs(period-expected_periods_days), 20, "Not returning quarterly financials")
# Test property defaults
data2 = self.ticker.quarterly_cashflow
self.assertTrue(data.equals(data2), "property not defaulting to 'pretty=True'")
# Test pretty=False
expected_keys = [k.replace(' ', '') for k in expected_keys]
data = self.ticker.get_cashflow(pretty=False, freq="quarterly")
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
for k in expected_keys:
self.assertIn(k, data.index, "Did not find expected row in index")
# Test to_dict
data = self.ticker.get_cashflow(as_dict=True)
self.assertIsInstance(data, dict, "data has wrong type")
def test_quarterly_cashflow_old_fmt(self):
expected_row = "NetIncome"
data = self.ticker_old_fmt.get_cashflow(legacy=True, freq="quarterly")
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
self.assertIn(expected_row, data.index, "Did not find expected row in index")
data_cached = self.ticker_old_fmt.get_cashflow(legacy=True, freq="quarterly")
self.assertIs(data, data_cached, "data not cached")
def test_sustainability(self):
data = self.ticker.sustainability
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.sustainability
self.assertIs(data, data_cached, "data not cached")
def test_recommendations(self):
data = self.ticker.recommendations
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.recommendations
self.assertIs(data, data_cached, "data not cached")
def test_recommendations_summary(self):
data = self.ticker.recommendations_summary
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.recommendations_summary
self.assertIs(data, data_cached, "data not cached")
def test_analyst_price_target(self):
data = self.ticker.analyst_price_target
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.analyst_price_target
self.assertIs(data, data_cached, "data not cached")
def test_revenue_forecasts(self):
data = self.ticker.revenue_forecasts
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.revenue_forecasts
self.assertIs(data, data_cached, "data not cached")
def test_calendar(self):
data = self.ticker.calendar
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
data_cached = self.ticker.calendar
self.assertIs(data, data_cached, "data not cached")
def test_isin(self):
data = self.ticker.isin
self.assertIsInstance(data, str, "data has wrong type")
self.assertEqual("ARDEUT116159", data, "data is empty")
data_cached = self.ticker.isin
self.assertIs(data, data_cached, "data not cached")
def test_options(self):
data = self.ticker.options
self.assertIsInstance(data, tuple, "data has wrong type")
self.assertTrue(len(data) > 1, "data is empty")
def test_shares(self):
data = self.ticker.shares
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
def test_info(self):
data = self.ticker.info
self.assertIsInstance(data, dict, "data has wrong type")
self.assertIn("symbol", data.keys(), "Did not find expected key in info dict")
self.assertEqual("GOOGL", data["symbol"], "Wrong symbol value in info dict")
def test_bad_freq_value_raises_exception(self):
self.assertRaises(ValueError, lambda: self.ticker.get_cashflow(freq="badarg"))
def suite():
suite = unittest.TestSuite()
suite.addTest(TestTicker('Test ticker'))
suite.addTest(TestTickerEarnings('Test earnings'))
suite.addTest(TestTickerHolders('Test holders'))
suite.addTest(TestTickerHistory('Test Ticker history'))
suite.addTest(TestTickerMiscFinancials('Test misc financials'))
return suite
if __name__ == '__main__':
unittest.main()

File diff suppressed because it is too large Load Diff

View File

@@ -1,102 +0,0 @@
import functools
from functools import lru_cache
import requests as requests
import re
from frozendict import frozendict
try:
import ujson as json
except ImportError:
import json as json
cache_maxsize = 64
def lru_cache_freezeargs(func):
"""
Decorator transforms mutable dictionary and list arguments into immutable types
Needed so lru_cache can cache method calls what has dict or list arguments.
"""
@functools.wraps(func)
def wrapped(*args, **kwargs):
args = tuple([frozendict(arg) if isinstance(arg, dict) else arg for arg in args])
kwargs = {k: frozendict(v) if isinstance(v, dict) else v for k, v in kwargs.items()}
args = tuple([tuple(arg) if isinstance(arg, list) else arg for arg in args])
kwargs = {k: tuple(v) if isinstance(v, list) else v for k, v in kwargs.items()}
return func(*args, **kwargs)
# copy over the lru_cache extra methods to this wrapper to be able to access them
# after this decorator has been applied
wrapped.cache_info = func.cache_info
wrapped.cache_clear = func.cache_clear
return wrapped
_SCRAPE_URL_ = 'https://finance.yahoo.com/quote'
class TickerData:
"""
Have one place to retrieve data from Yahoo API in order to ease caching and speed up operations
"""
user_agent_headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
def __init__(self, ticker: str, session=None):
self.ticker = ticker
self._session = session or requests
def get(self, url, user_agent_headers=None, params=None, proxy=None, timeout=30):
proxy = self._get_proxy(proxy)
response = self._session.get(
url=url,
params=params,
proxies=proxy,
timeout=timeout,
headers=user_agent_headers or self.user_agent_headers)
return response
@lru_cache_freezeargs
@lru_cache(maxsize=cache_maxsize)
def cache_get(self, url, user_agent_headers=None, params=None, proxy=None, timeout=30):
return self.get(url, user_agent_headers, params, proxy, timeout)
def _get_proxy(self, proxy):
# setup proxy in requests format
if proxy is not None:
if isinstance(proxy, dict) and "https" in proxy:
proxy = proxy["https"]
proxy = {"https": proxy}
return proxy
@lru_cache_freezeargs
@lru_cache(maxsize=cache_maxsize)
def get_json_data_stores(self, sub_page: str = None, proxy=None) -> dict:
'''
get_json_data_stores returns a python dictionary of the data stores in yahoo finance web page.
'''
if sub_page:
ticker_url = "{}/{}/{}".format(_SCRAPE_URL_, self.ticker, sub_page)
else:
ticker_url = "{}/{}".format(_SCRAPE_URL_, self.ticker)
html = self.get(url=ticker_url, proxy=proxy).text
# The actual json-data for stores is in a javascript assignment in the webpage
try:
json_str = html.split('root.App.main =')[1].split(
'(this)')[0].split(';\n}')[0].strip()
except IndexError:
# Fetch failed, probably because Yahoo spam triggered
return {}
data = json.loads(json_str)['context']['dispatcher']['stores']
# return data
new_data = json.dumps(data).replace('{}', 'null')
new_data = re.sub(
r'{[\'|\"]raw[\'|\"]:(.*?),(.*?)}', r'\1', new_data)
return json.loads(new_data)

View File

@@ -1,6 +0,0 @@
class YFinanceException(Exception):
pass
class YFinanceDataException(YFinanceException):
pass

View File

@@ -29,10 +29,10 @@ from . import Ticker, utils
from . import shared
def download(tickers, start=None, end=None, actions=False, threads=True, ignore_tz=True,
group_by='column', auto_adjust=False, back_adjust=False, repair=False, keepna=False,
def download(tickers, start=None, end=None, actions=False, threads=True, ignore_tz=True,
group_by='column', auto_adjust=False, back_adjust=False, keepna=False,
progress=True, period="max", show_errors=True, interval="1d", prepost=False,
proxy=None, rounding=False, timeout=10):
proxy=None, rounding=False, timeout=None, **kwargs):
"""Download yahoo tickers
:Parameters:
tickers : str, list
@@ -56,9 +56,6 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
Default is False
auto_adjust: bool
Adjust all OHLC automatically? Default is False
repair: bool
Detect currency unit 100x mixups and attempt repair
Default is False
keepna: bool
Keep NaN rows returned by Yahoo?
Default is False
@@ -114,7 +111,7 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
_download_one_threaded(ticker, period=period, interval=interval,
start=start, end=end, prepost=prepost,
actions=actions, auto_adjust=auto_adjust,
back_adjust=back_adjust, repair=repair, keepna=keepna,
back_adjust=back_adjust, keepna=keepna,
progress=(progress and i > 0), proxy=proxy,
rounding=rounding, timeout=timeout)
while len(shared._DFS) < len(tickers):
@@ -126,8 +123,7 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
data = _download_one(ticker, period=period, interval=interval,
start=start, end=end, prepost=prepost,
actions=actions, auto_adjust=auto_adjust,
back_adjust=back_adjust, repair=repair, keepna=keepna,
proxy=proxy,
back_adjust=back_adjust, keepna=keepna, proxy=proxy,
rounding=rounding, timeout=timeout)
shared._DFS[ticker.upper()] = data
if progress:
@@ -145,7 +141,7 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
if ignore_tz:
for tkr in shared._DFS.keys():
if (shared._DFS[tkr] is not None) and (shared._DFS[tkr].shape[0] > 0):
if (shared._DFS[tkr] is not None) and (shared._DFS[tkr].shape[0]>0):
shared._DFS[tkr].index = shared._DFS[tkr].index.tz_localize(None)
if len(tickers) == 1:
@@ -195,34 +191,28 @@ def _realign_dfs():
@_multitasking.task
def _download_one_threaded(ticker, start=None, end=None,
auto_adjust=False, back_adjust=False, repair=False,
auto_adjust=False, back_adjust=False,
actions=False, progress=True, period="max",
interval="1d", prepost=False, proxy=None,
keepna=False, rounding=False, timeout=10):
try:
data = _download_one(ticker, start, end, auto_adjust, back_adjust, repair,
actions, period, interval, prepost, proxy, rounding,
keepna, timeout)
except Exception as e:
# glob try/except needed as current thead implementation breaks if exception is raised.
shared._DFS[ticker] = utils.empty_df()
shared._ERRORS[ticker] = repr(e)
else:
shared._DFS[ticker.upper()] = data
keepna=False, rounding=False, timeout=None):
data = _download_one(ticker, start, end, auto_adjust, back_adjust,
actions, period, interval, prepost, proxy, rounding,
keepna, timeout)
shared._DFS[ticker.upper()] = data
if progress:
shared._PROGRESS_BAR.animate()
def _download_one(ticker, start=None, end=None,
auto_adjust=False, back_adjust=False, repair=False,
auto_adjust=False, back_adjust=False,
actions=False, period="max", interval="1d",
prepost=False, proxy=None, rounding=False,
keepna=False, timeout=10):
return Ticker(ticker).history(
period=period, interval=interval,
start=start, end=end, prepost=prepost,
actions=actions, auto_adjust=auto_adjust,
back_adjust=back_adjust, repair=repair, proxy=proxy,
rounding=rounding, keepna=keepna, timeout=timeout,
debug=False, raise_errors=False # debug and raise_errors false to not log and raise errors in threads
)
keepna=False, timeout=None):
return Ticker(ticker).history(period=period, interval=interval,
start=start, end=end, prepost=prepost,
actions=actions, auto_adjust=auto_adjust,
back_adjust=back_adjust, proxy=proxy,
rounding=rounding, keepna=keepna, many=True,
timeout=timeout)

View File

@@ -1,118 +0,0 @@
import pandas as pd
from yfinance import utils
from yfinance.data import TickerData
class Analysis:
def __init__(self, data: TickerData, proxy=None):
self._data = data
self.proxy = proxy
self._earnings_trend = None
self._analyst_trend_details = None
self._analyst_price_target = None
self._rev_est = None
self._eps_est = None
self._already_scraped = False
@property
def earnings_trend(self) -> pd.DataFrame:
if self._earnings_trend is None:
self._scrape(self.proxy)
return self._earnings_trend
@property
def analyst_trend_details(self) -> pd.DataFrame:
if self._analyst_trend_details is None:
self._scrape(self.proxy)
return self._analyst_trend_details
@property
def analyst_price_target(self) -> pd.DataFrame:
if self._analyst_price_target is None:
self._scrape(self.proxy)
return self._analyst_price_target
@property
def rev_est(self) -> pd.DataFrame:
if self._rev_est is None:
self._scrape(self.proxy)
return self._rev_est
@property
def eps_est(self) -> pd.DataFrame:
if self._eps_est is None:
self._scrape(self.proxy)
return self._eps_est
def _scrape(self, proxy):
if self._already_scraped:
return
self._already_scraped = True
# Analysis Data/Analyst Forecasts
analysis_data = self._data.get_json_data_stores("analysis", proxy=proxy)
try:
analysis_data = analysis_data['QuoteSummaryStore']
except KeyError as e:
err_msg = "No analysis data found, symbol may be delisted"
print('- %s: %s' % (self._data.ticker, err_msg))
return
if isinstance(analysis_data.get('earningsTrend'), dict):
try:
analysis = pd.DataFrame(analysis_data['earningsTrend']['trend'])
analysis['endDate'] = pd.to_datetime(analysis['endDate'])
analysis.set_index('period', inplace=True)
analysis.index = analysis.index.str.upper()
analysis.index.name = 'Period'
analysis.columns = utils.camel2title(analysis.columns)
dict_cols = []
for idx, row in analysis.iterrows():
for colname, colval in row.items():
if isinstance(colval, dict):
dict_cols.append(colname)
for k, v in colval.items():
new_colname = colname + ' ' + \
utils.camel2title([k])[0]
analysis.loc[idx, new_colname] = v
self._earnings_trend = analysis[[
c for c in analysis.columns if c not in dict_cols]]
except Exception:
pass
try:
self._analyst_trend_details = pd.DataFrame(analysis_data['recommendationTrend']['trend'])
except Exception as e:
self._analyst_trend_details = None
try:
self._analyst_price_target = pd.DataFrame(analysis_data['financialData'], index=[0])[
['targetLowPrice', 'currentPrice', 'targetMeanPrice', 'targetHighPrice', 'numberOfAnalystOpinions']].T
except Exception as e:
self._analyst_price_target = None
earnings_estimate = []
revenue_estimate = []
if self._analyst_trend_details is not None :
for key in analysis_data['earningsTrend']['trend']:
try:
earnings_dict = key['earningsEstimate']
earnings_dict['period'] = key['period']
earnings_dict['endDate'] = key['endDate']
earnings_estimate.append(earnings_dict)
revenue_dict = key['revenueEstimate']
revenue_dict['period'] = key['period']
revenue_dict['endDate'] = key['endDate']
revenue_estimate.append(revenue_dict)
except Exception as e:
pass
self._rev_est = pd.DataFrame(revenue_estimate)
self._eps_est = pd.DataFrame(earnings_estimate)
else:
self._rev_est = pd.DataFrame()
self._eps_est = pd.DataFrame()

View File

@@ -1,319 +0,0 @@
import datetime
import json
import pandas as pd
import numpy as np
from yfinance import utils
from yfinance.data import TickerData
from yfinance.exceptions import YFinanceDataException, YFinanceException
class Fundamentals:
def __init__(self, data: TickerData, proxy=None):
self._data = data
self.proxy = proxy
self._earnings = None
self._financials = None
self._shares = None
self._financials_data = None
self._fin_data_quote = None
self._basics_already_scraped = False
self._financials = Financials(data)
@property
def financials(self) -> "Financials":
return self._financials
@property
def earnings(self) -> dict:
if self._earnings is None:
self._scrape_earnings(self.proxy)
return self._earnings
@property
def shares(self) -> pd.DataFrame:
if self._shares is None:
self._scrape_shares(self.proxy)
return self._shares
def _scrape_basics(self, proxy):
if self._basics_already_scraped:
return
self._basics_already_scraped = True
self._financials_data = self._data.get_json_data_stores('financials', proxy)
try:
self._fin_data_quote = self._financials_data['QuoteSummaryStore']
except KeyError:
err_msg = "No financials data found, symbol may be delisted"
print('- %s: %s' % (self._data.ticker, err_msg))
return None
def _scrape_earnings(self, proxy):
self._scrape_basics(proxy)
# earnings
self._earnings = {"yearly": pd.DataFrame(), "quarterly": pd.DataFrame()}
if self._fin_data_quote is None:
return
if isinstance(self._fin_data_quote.get('earnings'), dict):
try:
earnings = self._fin_data_quote['earnings']['financialsChart']
earnings['financialCurrency'] = self._fin_data_quote['earnings'].get('financialCurrency', 'USD')
self._earnings['financialCurrency'] = earnings['financialCurrency']
df = pd.DataFrame(earnings['yearly']).set_index('date')
df.columns = utils.camel2title(df.columns)
df.index.name = 'Year'
self._earnings['yearly'] = df
df = pd.DataFrame(earnings['quarterly']).set_index('date')
df.columns = utils.camel2title(df.columns)
df.index.name = 'Quarter'
self._earnings['quarterly'] = df
except Exception:
pass
def _scrape_shares(self, proxy):
self._scrape_basics(proxy)
# shares outstanding
try:
# keep only years with non None data
available_shares = [shares_data for shares_data in
self._financials_data['QuoteTimeSeriesStore']['timeSeries']['annualBasicAverageShares']
if
shares_data]
shares = pd.DataFrame(available_shares)
shares['Year'] = shares['asOfDate'].agg(lambda x: int(x[:4]))
shares.set_index('Year', inplace=True)
shares.drop(columns=['dataId', 'asOfDate',
'periodType', 'currencyCode'], inplace=True)
shares.rename(
columns={'reportedValue': "BasicShares"}, inplace=True)
self._shares = shares
except Exception:
pass
class Financials:
def __init__(self, data: TickerData):
self._data = data
self._income_time_series = {}
self._balance_sheet_time_series = {}
self._cash_flow_time_series = {}
self._income_scraped = {}
self._balance_sheet_scraped = {}
self._cash_flow_scraped = {}
def get_income_time_series(self, freq="yearly", proxy=None) -> pd.DataFrame:
res = self._income_time_series
if freq not in res:
res[freq] = self._fetch_time_series("income", freq, proxy=None)
return res[freq]
def get_balance_sheet_time_series(self, freq="yearly", proxy=None) -> pd.DataFrame:
res = self._balance_sheet_time_series
if freq not in res:
res[freq] = self._fetch_time_series("balance-sheet", freq, proxy=None)
return res[freq]
def get_cash_flow_time_series(self, freq="yearly", proxy=None) -> pd.DataFrame:
res = self._cash_flow_time_series
if freq not in res:
res[freq] = self._fetch_time_series("cash-flow", freq, proxy=None)
return res[freq]
def _fetch_time_series(self, name, timescale, proxy=None):
# Fetching time series preferred over scraping 'QuoteSummaryStore',
# because it matches what Yahoo shows. But for some tickers returns nothing,
# despite 'QuoteSummaryStore' containing valid data.
allowed_names = ["income", "balance-sheet", "cash-flow"]
allowed_timescales = ["yearly", "quarterly"]
if name not in allowed_names:
raise ValueError("Illegal argument: name must be one of: {}".format(allowed_names))
if timescale not in allowed_timescales:
raise ValueError("Illegal argument: timescale must be one of: {}".format(allowed_names))
try:
statement = self._create_financials_table(name, timescale, proxy)
if statement is not None:
return statement
except YFinanceException as e:
print(f"- {self._data.ticker}: Failed to create {name} financials table for reason: {repr(e)}")
return pd.DataFrame()
def _create_financials_table(self, name, timescale, proxy):
if name == "income":
# Yahoo stores the 'income' table internally under 'financials' key
name = "financials"
keys = self._get_datastore_keys(name, proxy)
try:
return self.get_financials_time_series(timescale, keys, proxy)
except Exception as e:
pass
def _get_datastore_keys(self, sub_page, proxy) -> list:
data_stores = self._data.get_json_data_stores(sub_page, proxy)
# Step 1: get the keys:
def _finditem1(key, obj):
values = []
if isinstance(obj, dict):
if key in obj.keys():
values.append(obj[key])
for k, v in obj.items():
values += _finditem1(key, v)
elif isinstance(obj, list):
for v in obj:
values += _finditem1(key, v)
return values
try:
keys = _finditem1("key", data_stores['FinancialTemplateStore'])
except KeyError as e:
raise YFinanceDataException("Parsing FinancialTemplateStore failed, reason: {}".format(repr(e)))
if not keys:
raise YFinanceDataException("No keys in FinancialTemplateStore")
return keys
def get_financials_time_series(self, timescale, keys: list, proxy=None) -> pd.DataFrame:
timescale_translation = {"yearly": "annual", "quarterly": "quarterly"}
timescale = timescale_translation[timescale]
# Step 2: construct url:
ts_url_base = \
"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{0}?symbol={0}" \
.format(self._data.ticker)
url = ts_url_base + "&type=" + ",".join([timescale + k for k in keys])
# Yahoo returns maximum 4 years or 5 quarters, regardless of start_dt:
start_dt = datetime.datetime(2016, 12, 31)
end = (datetime.datetime.now() + datetime.timedelta(days=366))
url += "&period1={}&period2={}".format(int(start_dt.timestamp()), int(end.timestamp()))
# Step 3: fetch and reshape data
json_str = self._data.cache_get(url=url, proxy=proxy).text
json_data = json.loads(json_str)
data_raw = json_data["timeseries"]["result"]
# data_raw = [v for v in data_raw if len(v) > 1] # Discard keys with no data
for d in data_raw:
del d["meta"]
# Now reshape data into a table:
# Step 1: get columns and index:
timestamps = set()
data_unpacked = {}
for x in data_raw:
for k in x.keys():
if k == "timestamp":
timestamps.update(x[k])
else:
data_unpacked[k] = x[k]
timestamps = sorted(list(timestamps))
dates = pd.to_datetime(timestamps, unit="s")
df = pd.DataFrame(columns=dates, index=list(data_unpacked.keys()))
for k, v in data_unpacked.items():
if df is None:
df = pd.DataFrame(columns=dates, index=[k])
df.loc[k] = {pd.Timestamp(x["asOfDate"]): x["reportedValue"]["raw"] for x in v}
df.index = df.index.str.replace("^" + timescale, "", regex=True)
# Reorder table to match order on Yahoo website
df = df.reindex([k for k in keys if k in df.index])
df = df[sorted(df.columns, reverse=True)]
return df
def get_income_scrape(self, freq="yearly", proxy=None) -> pd.DataFrame:
res = self._income_scraped
if freq not in res:
res[freq] = self._scrape("income", freq, proxy=None)
return res[freq]
def get_balance_sheet_scrape(self, freq="yearly", proxy=None) -> pd.DataFrame:
res = self._balance_sheet_scraped
if freq not in res:
res[freq] = self._scrape("balance-sheet", freq, proxy=None)
return res[freq]
def get_cash_flow_scrape(self, freq="yearly", proxy=None) -> pd.DataFrame:
res = self._cash_flow_scraped
if freq not in res:
res[freq] = self._scrape("cash-flow", freq, proxy=None)
return res[freq]
def _scrape(self, name, timescale, proxy=None):
# Backup in case _fetch_time_series() fails to return data
allowed_names = ["income", "balance-sheet", "cash-flow"]
allowed_timescales = ["yearly", "quarterly"]
if name not in allowed_names:
raise ValueError("Illegal argument: name must be one of: {}".format(allowed_names))
if timescale not in allowed_timescales:
raise ValueError("Illegal argument: timescale must be one of: {}".format(allowed_names))
try:
statement = self._create_financials_table_old(name, timescale, proxy)
if statement is not None:
return statement
except YFinanceException as e:
print(f"- {self._data.ticker}: Failed to create financials table for {name} reason: {repr(e)}")
return pd.DataFrame()
def _create_financials_table_old(self, name, timescale, proxy):
data_stores = self._data.get_json_data_stores("financials", proxy)
# Fetch raw data
if not "QuoteSummaryStore" in data_stores:
raise YFinanceDataException(f"Yahoo not returning legacy financials data")
data = data_stores["QuoteSummaryStore"]
if name == "cash-flow":
key1 = "cashflowStatement"
key2 = "cashflowStatements"
elif name == "balance-sheet":
key1 = "balanceSheet"
key2 = "balanceSheetStatements"
else:
key1 = "incomeStatement"
key2 = "incomeStatementHistory"
key1 += "History"
if timescale == "quarterly":
key1 += "Quarterly"
if key1 not in data or data[key1] is None or key2 not in data[key1]:
raise YFinanceDataException(f"Yahoo not returning legacy {name} financials data")
data = data[key1][key2]
# Tabulate
df = pd.DataFrame(data)
if len(df) == 0:
raise YFinanceDataException(f"Yahoo not returning legacy {name} financials data")
df = df.drop(columns=['maxAge'])
for col in df.columns:
df[col] = df[col].replace('-', np.nan)
df.set_index('endDate', inplace=True)
try:
df.index = pd.to_datetime(df.index, unit='s')
except ValueError:
df.index = pd.to_datetime(df.index)
df = df.T
df.columns.name = ''
df.index.name = 'Breakdown'
# rename incorrect yahoo key
df.rename(index={'treasuryStock': 'gainsLossesNotAffectingRetainedEarnings'}, inplace=True)
# Upper-case first letter, leave rest unchanged:
s0 = df.index[0]
df.index = [s[0].upper()+s[1:] for s in df.index]
return df

View File

@@ -1,66 +0,0 @@
import pandas as pd
from yfinance.data import TickerData
class Holders:
_SCRAPE_URL_ = 'https://finance.yahoo.com/quote'
def __init__(self, data: TickerData, proxy=None):
self._data = data
self.proxy = proxy
self._major = None
self._institutional = None
self._mutualfund = None
@property
def major(self) -> pd.DataFrame:
if self._major is None:
self._scrape(self.proxy)
return self._major
@property
def institutional(self) -> pd.DataFrame:
if self._institutional is None:
self._scrape(self.proxy)
return self._institutional
@property
def mutualfund(self) -> pd.DataFrame:
if self._mutualfund is None:
self._scrape(self.proxy)
return self._mutualfund
def _scrape(self, proxy):
ticker_url = "{}/{}".format(self._SCRAPE_URL_, self._data.ticker)
try:
resp = self._data.cache_get(ticker_url + '/holders', proxy)
holders = pd.read_html(resp.text)
except Exception:
holders = []
if len(holders) >= 3:
self._major = holders[0]
self._institutional = holders[1]
self._mutualfund = holders[2]
elif len(holders) >= 2:
self._major = holders[0]
self._institutional = holders[1]
elif len(holders) >= 1:
self._major = holders[0]
if self._institutional is not None:
if 'Date Reported' in self._institutional:
self._institutional['Date Reported'] = pd.to_datetime(
self._institutional['Date Reported'])
if '% Out' in self._institutional:
self._institutional['% Out'] = self._institutional[
'% Out'].str.replace('%', '').astype(float) / 100
if self._mutualfund is not None:
if 'Date Reported' in self._mutualfund:
self._mutualfund['Date Reported'] = pd.to_datetime(
self._mutualfund['Date Reported'])
if '% Out' in self._mutualfund:
self._mutualfund['% Out'] = self._mutualfund[
'% Out'].str.replace('%', '').astype(float) / 100

View File

@@ -1,210 +0,0 @@
import datetime
import json
import pandas as pd
from yfinance import utils
from yfinance.data import TickerData
class Quote:
def __init__(self, data: TickerData, proxy=None):
self._data = data
self.proxy = proxy
self._info = None
self._sustainability = None
self._recommendations = None
self._calendar = None
self._already_scraped = False
self._already_scraped_complementary = False
@property
def info(self) -> dict:
if self._info is None:
self._scrape(self.proxy)
self._scrape_complementary(self.proxy)
return self._info
@property
def sustainability(self) -> pd.DataFrame:
if self._sustainability is None:
self._scrape(self.proxy)
return self._sustainability
@property
def recommendations(self) -> pd.DataFrame:
if self._recommendations is None:
self._scrape(self.proxy)
return self._recommendations
@property
def calendar(self) -> pd.DataFrame:
if self._calendar is None:
self._scrape(self.proxy)
return self._calendar
def _scrape(self, proxy):
if self._already_scraped:
return
self._already_scraped = True
# get info and sustainability
json_data = self._data.get_json_data_stores(proxy=proxy)
try:
quote_summary_store = json_data['QuoteSummaryStore']
except KeyError:
err_msg = "No summary info found, symbol may be delisted"
print('- %s: %s' % (self._data.ticker, err_msg))
return None
# sustainability
d = {}
try:
if isinstance(quote_summary_store.get('esgScores'), dict):
for item in quote_summary_store['esgScores']:
if not isinstance(quote_summary_store['esgScores'][item], (dict, list)):
d[item] = quote_summary_store['esgScores'][item]
s = pd.DataFrame(index=[0], data=d)[-1:].T
s.columns = ['Value']
s.index.name = '%.f-%.f' % (
s[s.index == 'ratingYear']['Value'].values[0],
s[s.index == 'ratingMonth']['Value'].values[0])
self._sustainability = s[~s.index.isin(
['maxAge', 'ratingYear', 'ratingMonth'])]
except Exception:
pass
self._info = {}
try:
items = ['summaryProfile', 'financialData', 'quoteType',
'defaultKeyStatistics', 'assetProfile', 'summaryDetail']
for item in items:
if isinstance(quote_summary_store.get(item), dict):
self._info.update(quote_summary_store[item])
except Exception:
pass
# For ETFs, provide this valuable data: the top holdings of the ETF
try:
if 'topHoldings' in quote_summary_store:
self._info.update(quote_summary_store['topHoldings'])
except Exception:
pass
try:
if not isinstance(quote_summary_store.get('summaryDetail'), dict):
# For some reason summaryDetail did not give any results. The price dict
# usually has most of the same info
self._info.update(quote_summary_store.get('price', {}))
except Exception:
pass
try:
# self._info['regularMarketPrice'] = self._info['regularMarketOpen']
self._info['regularMarketPrice'] = quote_summary_store.get('price', {}).get(
'regularMarketPrice', self._info.get('regularMarketOpen', None))
except Exception:
pass
try:
self._info['preMarketPrice'] = quote_summary_store.get('price', {}).get(
'preMarketPrice', self._info.get('preMarketPrice', None))
except Exception:
pass
self._info['logo_url'] = ""
try:
if not 'website' in self._info:
self._info['logo_url'] = 'https://logo.clearbit.com/%s.com' % \
self._info['shortName'].split(' ')[0].split(',')[0]
else:
domain = self._info['website'].split(
'://')[1].split('/')[0].replace('www.', '')
self._info['logo_url'] = 'https://logo.clearbit.com/%s' % domain
except Exception:
pass
# events
try:
cal = pd.DataFrame(quote_summary_store['calendarEvents']['earnings'])
cal['earningsDate'] = pd.to_datetime(
cal['earningsDate'], unit='s')
self._calendar = cal.T
self._calendar.index = utils.camel2title(self._calendar.index)
self._calendar.columns = ['Value']
except Exception as e:
pass
# analyst recommendations
try:
rec = pd.DataFrame(
quote_summary_store['upgradeDowngradeHistory']['history'])
rec['earningsDate'] = pd.to_datetime(
rec['epochGradeDate'], unit='s')
rec.set_index('earningsDate', inplace=True)
rec.index.name = 'Date'
rec.columns = utils.camel2title(rec.columns)
self._recommendations = rec[[
'Firm', 'To Grade', 'From Grade', 'Action']].sort_index()
except Exception:
pass
def _scrape_complementary(self, proxy):
if self._already_scraped_complementary:
return
self._already_scraped_complementary = True
self._scrape(proxy)
if self._info is None:
return
# Complementary key-statistics. For now just want 'trailing PEG ratio'
keys = {"trailingPegRatio"}
if keys:
# Simplified the original scrape code for key-statistics. Very expensive for fetching
# just one value, best if scraping most/all:
#
# p = _re.compile(r'root\.App\.main = (.*);')
# url = 'https://finance.yahoo.com/quote/{}/key-statistics?p={}'.format(self._ticker.ticker, self._ticker.ticker)
# try:
# r = session.get(url, headers=utils.user_agent_headers)
# data = _json.loads(p.findall(r.text)[0])
# key_stats = data['context']['dispatcher']['stores']['QuoteTimeSeriesStore']["timeSeries"]
# for k in keys:
# if k not in key_stats or len(key_stats[k])==0:
# # Yahoo website prints N/A, indicates Yahoo lacks necessary data to calculate
# v = None
# else:
# # Select most recent (last) raw value in list:
# v = key_stats[k][-1]["reportedValue"]["raw"]
# self._info[k] = v
# except Exception:
# raise
# pass
#
# For just one/few variable is faster to query directly:
url = "https://query1.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{}?symbol={}".format(
self._data.ticker, self._data.ticker)
for k in keys:
url += "&type=" + k
# Request 6 months of data
url += "&period1={}".format(
int((datetime.datetime.now() - datetime.timedelta(days=365 // 2)).timestamp()))
url += "&period2={}".format(int((datetime.datetime.now() + datetime.timedelta(days=1)).timestamp()))
json_str = self._data.cache_get(url=url, proxy=proxy).text
json_data = json.loads(json_str)
key_stats = json_data["timeseries"]["result"][0]
if k not in key_stats:
# Yahoo website prints N/A, indicates Yahoo lacks necessary data to calculate
v = None
else:
# Select most recent (last) raw value in list:
v = key_stats[k][-1]["reportedValue"]["raw"]
self._info[k] = v

View File

@@ -21,18 +21,21 @@
from __future__ import print_function
# import time as _time
import datetime as _datetime
import requests as _requests
import pandas as _pd
# import numpy as _np
# import json as _json
# import re as _re
from collections import namedtuple as _namedtuple
from . import utils
from .base import TickerBase
class Ticker(TickerBase):
def __init__(self, ticker, session=None):
super(Ticker, self).__init__(ticker, session=session)
self._expirations = {}
def __repr__(self):
return 'yfinance.Ticker object <%s>' % self.ticker
@@ -45,7 +48,17 @@ class Ticker(TickerBase):
url = "{}/v7/finance/options/{}?date={}".format(
self._base_url, self.ticker, date)
r = self._data.get(url=url, proxy=proxy).json()
# setup proxy in requests format
if proxy is not None:
if isinstance(proxy, dict) and "https" in proxy:
proxy = proxy["https"]
proxy = {"https": proxy}
r = _requests.get(
url=url,
proxies=proxy,
headers=utils.user_agent_headers
).json()
if len(r.get('optionChain', {}).get('result', [])) > 0:
for exp in r['optionChain']['result'][0]['expirationDates']:
self._expirations[_datetime.datetime.utcfromtimestamp(
@@ -102,43 +115,39 @@ class Ticker(TickerBase):
return self.get_isin()
@property
def major_holders(self) -> _pd.DataFrame:
def major_holders(self):
return self.get_major_holders()
@property
def institutional_holders(self) -> _pd.DataFrame:
def institutional_holders(self):
return self.get_institutional_holders()
@property
def mutualfund_holders(self) -> _pd.DataFrame:
def mutualfund_holders(self):
return self.get_mutualfund_holders()
@property
def dividends(self) -> _pd.Series:
def dividends(self):
return self.get_dividends()
@property
def capital_gains(self):
return self.get_capital_gains()
@property
def splits(self) -> _pd.Series:
def splits(self):
return self.get_splits()
@property
def actions(self) -> _pd.DataFrame:
def actions(self):
return self.get_actions()
@property
def shares(self) -> _pd.DataFrame :
def shares(self):
return self.get_shares()
@property
def info(self) -> dict:
def info(self):
return self.get_info()
@property
def calendar(self) -> _pd.DataFrame:
def calendar(self):
return self.get_calendar()
@property
@@ -146,63 +155,51 @@ class Ticker(TickerBase):
return self.get_recommendations()
@property
def earnings(self) -> _pd.DataFrame:
def earnings(self):
return self.get_earnings()
@property
def quarterly_earnings(self) -> _pd.DataFrame:
def quarterly_earnings(self):
return self.get_earnings(freq='quarterly')
@property
def income_stmt(self) -> _pd.DataFrame:
return self.get_income_stmt(pretty=True)
def financials(self):
return self.get_financials()
@property
def quarterly_income_stmt(self) -> _pd.DataFrame:
return self.get_income_stmt(pretty=True, freq='quarterly')
def quarterly_financials(self):
return self.get_financials(freq='quarterly')
@property
def balance_sheet(self) -> _pd.DataFrame:
return self.get_balance_sheet(pretty=True)
def balance_sheet(self):
return self.get_balancesheet()
@property
def quarterly_balance_sheet(self) -> _pd.DataFrame:
return self.get_balance_sheet(pretty=True, freq='quarterly')
def quarterly_balance_sheet(self):
return self.get_balancesheet(freq='quarterly')
@property
def balancesheet(self) -> _pd.DataFrame:
return self.balance_sheet
def balancesheet(self):
return self.get_balancesheet()
@property
def quarterly_balancesheet(self) -> _pd.DataFrame:
return self.quarterly_balance_sheet
def quarterly_balancesheet(self):
return self.get_balancesheet(freq='quarterly')
@property
def cashflow(self) -> _pd.DataFrame:
return self.get_cashflow(pretty=True, freq="yearly")
def cashflow(self):
return self.get_cashflow()
@property
def quarterly_cashflow(self) -> _pd.DataFrame:
return self.get_cashflow(pretty=True, freq='quarterly')
def quarterly_cashflow(self):
return self.get_cashflow(freq='quarterly')
@property
def recommendations_summary(self):
return self.get_recommendations_summary()
@property
def analyst_price_target(self) -> _pd.DataFrame:
return self.get_analyst_price_target()
@property
def revenue_forecasts(self) -> _pd.DataFrame:
return self.get_rev_forecast()
@property
def sustainability(self) -> _pd.DataFrame:
def sustainability(self):
return self.get_sustainability()
@property
def options(self) -> tuple:
def options(self):
if not self._expirations:
self._download_options()
return tuple(self._expirations.keys())
@@ -212,17 +209,13 @@ class Ticker(TickerBase):
return self.get_news()
@property
def earnings_trend(self) -> _pd.DataFrame:
return self.get_earnings_trend()
def analysis(self):
return self.get_analysis()
@property
def earnings_dates(self) -> _pd.DataFrame:
def earnings_history(self):
return self.get_earnings_history()
@property
def earnings_dates(self):
return self.get_earnings_dates()
@property
def earnings_forecasts(self) -> _pd.DataFrame:
return self.get_earnings_forecast()
@property
def history_metadata(self) -> dict:
return self.get_history_metadata()

View File

@@ -25,7 +25,7 @@ from . import Ticker, multi
# from collections import namedtuple as _namedtuple
class Tickers:
class Tickers():
def __repr__(self):
return 'yfinance.Tickers object <%s>' % ",".join(self.symbols)
@@ -46,31 +46,27 @@ class Tickers:
def history(self, period="1mo", interval="1d",
start=None, end=None, prepost=False,
actions=True, auto_adjust=True, repair=False,
proxy=None,
actions=True, auto_adjust=True, proxy=None,
threads=True, group_by='column', progress=True,
timeout=10, **kwargs):
timeout=None, **kwargs):
return self.download(
period, interval,
start, end, prepost,
actions, auto_adjust, repair,
proxy,
actions, auto_adjust, proxy,
threads, group_by, progress,
timeout, **kwargs)
def download(self, period="1mo", interval="1d",
start=None, end=None, prepost=False,
actions=True, auto_adjust=True, repair=False,
proxy=None,
actions=True, auto_adjust=True, proxy=None,
threads=True, group_by='column', progress=True,
timeout=10, **kwargs):
timeout=None, **kwargs):
data = multi.download(self.symbols,
start=start, end=end,
actions=actions,
auto_adjust=auto_adjust,
repair=repair,
period=period,
interval=interval,
prepost=prepost,

View File

@@ -22,9 +22,6 @@
from __future__ import print_function
import datetime as _datetime
import dateutil as _dateutil
from typing import Dict, Union, List, Optional
import pytz as _tz
import requests as _requests
import re as _re
@@ -33,18 +30,16 @@ import numpy as _np
import sys as _sys
import os as _os
import appdirs as _ad
import sqlite3 as _sqlite3
import atexit as _atexit
from threading import Lock
from pytz import UnknownTimeZoneError
mutex = Lock()
try:
import ujson as _json
except ImportError:
import json as _json
user_agent_headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
@@ -54,7 +49,7 @@ def is_isin(string):
def get_all_by_isin(isin, proxy=None, session=None):
if not (is_isin(isin)):
if not(is_isin(isin)):
raise ValueError("Invalid ISIN number")
from .base import _BASE_URL_
@@ -93,9 +88,7 @@ def get_news_by_isin(isin, proxy=None, session=None):
return data.get('news', {})
def empty_df(index=None):
if index is None:
index = []
def empty_df(index=[]):
empty = _pd.DataFrame(index=index, data={
'Open': _np.nan, 'High': _np.nan, 'Low': _np.nan,
'Close': _np.nan, 'Adj Close': _np.nan, 'Volume': _np.nan})
@@ -110,187 +103,48 @@ def empty_earnings_dates_df():
return empty
def build_template(data):
'''
build_template returns the details required to rebuild any of the yahoo finance financial statements in the same order as the yahoo finance webpage. The function is built to be used on the "FinancialTemplateStore" json which appears in any one of the three yahoo finance webpages: "/financials", "/cash-flow" and "/balance-sheet".
Returns:
- template_annual_order: The order that annual figures should be listed in.
- template_ttm_order: The order that TTM (Trailing Twelve Month) figures should be listed in.
- template_order: The order that quarterlies should be in (note that quarterlies have no pre-fix - hence why this is required).
- level_detail: The level of each individual line item. E.g. for the "/financials" webpage, "Total Revenue" is a level 0 item and is the summation of "Operating Revenue" and "Excise Taxes" which are level 1 items.
'''
template_ttm_order = [] # Save the TTM (Trailing Twelve Months) ordering to an object.
template_annual_order = [] # Save the annual ordering to an object.
template_order = [] # Save the ordering to an object (this can be utilized for quarterlies)
level_detail = [] # Record the level of each line item of the income statement ("Operating Revenue" and "Excise Taxes" sum to return "Total Revenue" we need to keep track of this)
for key in data['template']:
# Loop through the json to retreive the exact financial order whilst appending to the objects
template_ttm_order.append('trailing{}'.format(key['key']))
template_annual_order.append('annual{}'.format(key['key']))
template_order.append('{}'.format(key['key']))
level_detail.append(0)
if 'children' in key:
for child1 in key['children']: # Level 1
template_ttm_order.append('trailing{}'.format(child1['key']))
template_annual_order.append('annual{}'.format(child1['key']))
template_order.append('{}'.format(child1['key']))
level_detail.append(1)
if 'children' in child1:
for child2 in child1['children']: # Level 2
template_ttm_order.append('trailing{}'.format(child2['key']))
template_annual_order.append('annual{}'.format(child2['key']))
template_order.append('{}'.format(child2['key']))
level_detail.append(2)
if 'children' in child2:
for child3 in child2['children']: # Level 3
template_ttm_order.append('trailing{}'.format(child3['key']))
template_annual_order.append('annual{}'.format(child3['key']))
template_order.append('{}'.format(child3['key']))
level_detail.append(3)
if 'children' in child3:
for child4 in child3['children']: # Level 4
template_ttm_order.append('trailing{}'.format(child4['key']))
template_annual_order.append('annual{}'.format(child4['key']))
template_order.append('{}'.format(child4['key']))
level_detail.append(4)
if 'children' in child4:
for child5 in child4['children']: # Level 5
template_ttm_order.append('trailing{}'.format(child5['key']))
template_annual_order.append('annual{}'.format(child5['key']))
template_order.append('{}'.format(child5['key']))
level_detail.append(5)
return template_ttm_order, template_annual_order, template_order, level_detail
def get_html(url, proxy=None, session=None):
session = session or _requests
html = session.get(url=url, proxies=proxy, headers=user_agent_headers).text
return html
def retreive_financial_details(data):
'''
retreive_financial_details returns all of the available financial details under the "QuoteTimeSeriesStore" for any of the following three yahoo finance webpages: "/financials", "/cash-flow" and "/balance-sheet".
def get_json(url, proxy=None, session=None):
session = session or _requests
html = session.get(url=url, proxies=proxy, headers=user_agent_headers).text
Returns:
- TTM_dicts: A dictionary full of all of the available Trailing Twelve Month figures, this can easily be converted to a pandas dataframe.
- Annual_dicts: A dictionary full of all of the available Annual figures, this can easily be converted to a pandas dataframe.
'''
TTM_dicts = [] # Save a dictionary object to store the TTM financials.
Annual_dicts = [] # Save a dictionary object to store the Annual financials.
for key in data['timeSeries']: # Loop through the time series data to grab the key financial figures.
try:
if len(data['timeSeries'][key]) > 0:
time_series_dict = {}
time_series_dict['index'] = key
for each in data['timeSeries'][key]: # Loop through the years
if each == None:
continue
else:
time_series_dict[each['asOfDate']] = each['reportedValue']
# time_series_dict["{}".format(each['asOfDate'])] = data['timeSeries'][key][each]['reportedValue']
if 'trailing' in key:
TTM_dicts.append(time_series_dict)
elif 'annual' in key:
Annual_dicts.append(time_series_dict)
except Exception as e:
pass
return TTM_dicts, Annual_dicts
if "QuoteSummaryStore" not in html:
html = session.get(url=url, proxies=proxy).text
if "QuoteSummaryStore" not in html:
return {}
json_str = html.split('root.App.main =')[1].split(
'(this)')[0].split(';\n}')[0].strip()
data = _json.loads(json_str)[
'context']['dispatcher']['stores']['QuoteSummaryStore']
# add data about Shares Outstanding for companies' tickers if they are available
try:
data['annualBasicAverageShares'] = _json.loads(
json_str)['context']['dispatcher']['stores'][
'QuoteTimeSeriesStore']['timeSeries']['annualBasicAverageShares']
except Exception:
pass
# return data
new_data = _json.dumps(data).replace('{}', 'null')
new_data = _re.sub(
r'\{[\'|\"]raw[\'|\"]:(.*?),(.*?)\}', r'\1', new_data)
return _json.loads(new_data)
def format_annual_financial_statement(level_detail, annual_dicts, annual_order, ttm_dicts=None, ttm_order=None):
'''
format_annual_financial_statement formats any annual financial statement
Returns:
- _statement: A fully formatted annual financial statement in pandas dataframe.
'''
Annual = _pd.DataFrame.from_dict(annual_dicts).set_index("index")
Annual = Annual.reindex(annual_order)
Annual.index = Annual.index.str.replace(r'annual', '')
# Note: balance sheet is the only financial statement with no ttm detail
if (ttm_dicts not in [[], None]) and (ttm_order not in [[], None]):
TTM = _pd.DataFrame.from_dict(ttm_dicts).set_index("index")
TTM = TTM.reindex(ttm_order)
# Add 'TTM' prefix to all column names, so if combined we can tell
# the difference between actuals and TTM (similar to yahoo finance).
TTM.columns = ['TTM ' + str(col) for col in TTM.columns]
TTM.index = TTM.index.str.replace(r'trailing', '')
_statement = Annual.merge(TTM, left_index=True, right_index=True)
else:
_statement = Annual
_statement.index = camel2title(_statement.T.index)
_statement['level_detail'] = level_detail
_statement = _statement.set_index([_statement.index, 'level_detail'])
_statement = _statement[sorted(_statement.columns, reverse=True)]
_statement = _statement.dropna(how='all')
return _statement
def format_quarterly_financial_statement(_statement, level_detail, order):
'''
format_quarterly_financial_statements formats any quarterly financial statement
Returns:
- _statement: A fully formatted quarterly financial statement in pandas dataframe.
'''
_statement = _statement.reindex(order)
_statement.index = camel2title(_statement.T)
_statement['level_detail'] = level_detail
_statement = _statement.set_index([_statement.index, 'level_detail'])
_statement = _statement[sorted(_statement.columns, reverse=True)]
_statement = _statement.dropna(how='all')
_statement.columns = _pd.to_datetime(_statement.columns).date
return _statement
def camel2title(strings: List[str], sep: str = ' ', acronyms: Optional[List[str]] = None) -> List[str]:
if isinstance(strings, str) or not hasattr(strings, '__iter__'):
raise TypeError("camel2title() 'strings' argument must be iterable of strings")
if len(strings) == 0:
return strings
if not isinstance(strings[0], str):
raise TypeError("camel2title() 'strings' argument must be iterable of strings")
if not isinstance(sep, str) or len(sep) != 1:
raise ValueError(f"camel2title() 'sep' argument = '{sep}' must be single character")
if _re.match("[a-zA-Z0-9]", sep):
raise ValueError(f"camel2title() 'sep' argument = '{sep}' cannot be alpha-numeric")
if _re.escape(sep) != sep and sep not in {' ', '-'}:
# Permit some exceptions, I don't understand why they get escaped
raise ValueError(f"camel2title() 'sep' argument = '{sep}' cannot be special character")
if acronyms is None:
pat = "([a-z])([A-Z])"
rep = rf"\g<1>{sep}\g<2>"
return [_re.sub(pat, rep, s).title() for s in strings]
# Handling acronyms requires more care. Assumes Yahoo returns acronym strings upper-case
if isinstance(acronyms, str) or not hasattr(acronyms, '__iter__') or not isinstance(acronyms[0], str):
raise TypeError("camel2title() 'acronyms' argument must be iterable of strings")
for a in acronyms:
if not _re.match("^[A-Z]+$", a):
raise ValueError(f"camel2title() 'acronyms' argument must only contain upper-case, but '{a}' detected")
# Insert 'sep' between lower-then-upper-case
pat = "([a-z])([A-Z])"
rep = rf"\g<1>{sep}\g<2>"
strings = [_re.sub(pat, rep, s) for s in strings]
# Insert 'sep' after acronyms
for a in acronyms:
pat = f"({a})([A-Z][a-z])"
rep = rf"\g<1>{sep}\g<2>"
strings = [_re.sub(pat, rep, s) for s in strings]
# Apply str.title() to non-acronym words
strings = [s.split(sep) for s in strings]
strings = [[j.title() if not j in acronyms else j for j in s] for s in strings]
strings = [sep.join(s) for s in strings]
return strings
def camel2title(o):
return [_re.sub("([a-z])([A-Z])", r"\g<1> \g<2>", i).title() for i in o]
def _parse_user_dt(dt, exchange_tz):
if isinstance(dt, int):
# Should already be epoch, test with conversion:
## Should already be epoch, test with conversion:
_datetime.datetime.fromtimestamp(dt)
else:
# Convert str/date -> datetime, set tzinfo=exchange, get timestamp:
@@ -305,17 +159,7 @@ def _parse_user_dt(dt, exchange_tz):
return dt
def _interval_to_timedelta(interval):
if interval == "1mo":
return _dateutil.relativedelta(months=1)
elif interval == "1wk":
return _pd.Timedelta(days=7, unit='d')
else:
return _pd.Timedelta(interval)
def auto_adjust(data):
col_order = data.columns
df = data.copy()
ratio = df["Close"] / df["Adj Close"]
df["Adj Open"] = df["Open"] / ratio
@@ -331,13 +175,13 @@ def auto_adjust(data):
"Adj Low": "Low", "Adj Close": "Close"
}, inplace=True)
return df[[c for c in col_order if c in df.columns]]
df = df[["Open", "High", "Low", "Close", "Volume"]]
return df[["Open", "High", "Low", "Close", "Volume"]]
def back_adjust(data):
""" back-adjusted data to mimic true historical prices """
col_order = data.columns
df = data.copy()
ratio = df["Adj Close"] / df["Close"]
df["Adj Open"] = df["Open"] * ratio
@@ -353,7 +197,7 @@ def back_adjust(data):
"Adj Low": "Low"
}, inplace=True)
return df[[c for c in col_order if c in df.columns]]
return df[["Open", "High", "Low", "Close", "Volume"]]
def parse_quotes(data):
@@ -385,8 +229,6 @@ def parse_quotes(data):
def parse_actions(data):
dividends = _pd.DataFrame(
columns=["Dividends"], index=_pd.DatetimeIndex([]))
capital_gains = _pd.DataFrame(
columns=["Capital Gains"], index=_pd.DatetimeIndex([]))
splits = _pd.DataFrame(
columns=["Stock Splits"], index=_pd.DatetimeIndex([]))
@@ -397,15 +239,8 @@ def parse_actions(data):
dividends.set_index("date", inplace=True)
dividends.index = _pd.to_datetime(dividends.index, unit="s")
dividends.sort_index(inplace=True)
dividends.columns = ["Dividends"]
if "capitalGains" in data["events"]:
capital_gains = _pd.DataFrame(
data=list(data["events"]["capitalGains"].values()))
capital_gains.set_index("date", inplace=True)
capital_gains.index = _pd.to_datetime(capital_gains.index, unit="s")
capital_gains.sort_index(inplace=True)
capital_gains.columns = ["Capital Gains"]
dividends.columns = ["Dividends"]
if "splits" in data["events"]:
splits = _pd.DataFrame(
@@ -414,232 +249,25 @@ def parse_actions(data):
splits.index = _pd.to_datetime(splits.index, unit="s")
splits.sort_index(inplace=True)
splits["Stock Splits"] = splits["numerator"] / \
splits["denominator"]
splits = splits[["Stock Splits"]]
splits["denominator"]
splits = splits["Stock Splits"]
return dividends, splits, capital_gains
def set_df_tz(df, interval, tz):
if df.index.tz is None:
df.index = df.index.tz_localize("UTC")
df.index = df.index.tz_convert(tz)
return df
def fix_Yahoo_returning_live_separate(quotes, interval, tz_exchange):
# Yahoo bug fix. If market is open today then Yahoo normally returns
# todays data as a separate row from rest-of week/month interval in above row.
# Seems to depend on what exchange e.g. crypto OK.
# Fix = merge them together
n = quotes.shape[0]
if n > 1:
dt1 = quotes.index[n - 1]
dt2 = quotes.index[n - 2]
if quotes.index.tz is None:
dt1 = dt1.tz_localize("UTC")
dt2 = dt2.tz_localize("UTC")
dt1 = dt1.tz_convert(tz_exchange)
dt2 = dt2.tz_convert(tz_exchange)
if interval == "1d":
# Similar bug in daily data except most data is simply duplicated
# - exception is volume, *slightly* greater on final row (and matches website)
if dt1.date() == dt2.date():
# Last two rows are on same day. Drop second-to-last row
quotes = quotes.drop(quotes.index[n - 2])
else:
if interval == "1wk":
last_rows_same_interval = dt1.year == dt2.year and dt1.week == dt2.week
elif interval == "1mo":
last_rows_same_interval = dt1.month == dt2.month
elif interval == "3mo":
last_rows_same_interval = dt1.year == dt2.year and dt1.quarter == dt2.quarter
else:
last_rows_same_interval = (dt1-dt2) < _pd.Timedelta(interval)
if last_rows_same_interval:
# Last two rows are within same interval
idx1 = quotes.index[n - 1]
idx2 = quotes.index[n - 2]
if _np.isnan(quotes.loc[idx2, "Open"]):
quotes.loc[idx2, "Open"] = quotes["Open"][n - 1]
# Note: nanmax() & nanmin() ignores NaNs
quotes.loc[idx2, "High"] = _np.nanmax([quotes["High"][n - 1], quotes["High"][n - 2]])
quotes.loc[idx2, "Low"] = _np.nanmin([quotes["Low"][n - 1], quotes["Low"][n - 2]])
quotes.loc[idx2, "Close"] = quotes["Close"][n - 1]
if "Adj High" in quotes.columns:
quotes.loc[idx2, "Adj High"] = _np.nanmax([quotes["Adj High"][n - 1], quotes["Adj High"][n - 2]])
if "Adj Low" in quotes.columns:
quotes.loc[idx2, "Adj Low"] = _np.nanmin([quotes["Adj Low"][n - 1], quotes["Adj Low"][n - 2]])
if "Adj Close" in quotes.columns:
quotes.loc[idx2, "Adj Close"] = quotes["Adj Close"][n - 1]
quotes.loc[idx2, "Volume"] += quotes["Volume"][n - 1]
quotes = quotes.drop(quotes.index[n - 1])
return quotes
def safe_merge_dfs(df_main, df_sub, interval):
# Carefully merge 'df_sub' onto 'df_main'
# If naive merge fails, try again with reindexing df_sub:
# 1) if interval is weekly or monthly, then try with index set to start of week/month
# 2) if still failing then manually search through df_main.index to reindex df_sub
if df_sub.shape[0] == 0:
raise Exception("No data to merge")
df_sub_backup = df_sub.copy()
data_cols = [c for c in df_sub.columns if c not in df_main]
if len(data_cols) > 1:
raise Exception("Expected 1 data col")
data_col = data_cols[0]
def _reindex_events(df, new_index, data_col_name):
if len(new_index) == len(set(new_index)):
# No duplicates, easy
df.index = new_index
return df
df["_NewIndex"] = new_index
# Duplicates present within periods but can aggregate
if data_col_name == "Dividends":
# Add
df = df.groupby("_NewIndex").sum()
df.index.name = None
elif data_col_name == "Stock Splits":
# Product
df = df.groupby("_NewIndex").prod()
df.index.name = None
else:
raise Exception("New index contains duplicates but unsure how to aggregate for '{}'".format(data_col_name))
if "_NewIndex" in df.columns:
df = df.drop("_NewIndex", axis=1)
return df
df = df_main.join(df_sub)
f_na = df[data_col].isna()
data_lost = sum(~f_na) < df_sub.shape[0]
if not data_lost:
return df
# Lost data during join()
# Backdate all df_sub.index dates to start of week/month
if interval == "1wk":
new_index = _pd.PeriodIndex(df_sub.index, freq='W').to_timestamp()
elif interval == "1mo":
new_index = _pd.PeriodIndex(df_sub.index, freq='M').to_timestamp()
elif interval == "3mo":
new_index = _pd.PeriodIndex(df_sub.index, freq='Q').to_timestamp()
else:
new_index = None
if new_index is not None:
new_index = new_index.tz_localize(df.index.tz, ambiguous=True, nonexistent='shift_forward')
df_sub = _reindex_events(df_sub, new_index, data_col)
df = df_main.join(df_sub)
f_na = df[data_col].isna()
data_lost = sum(~f_na) < df_sub.shape[0]
if not data_lost:
return df
# Lost data during join(). Manually check each df_sub.index date against df_main.index to
# find matching interval
df_sub = df_sub_backup.copy()
new_index = [-1] * df_sub.shape[0]
for i in range(df_sub.shape[0]):
dt_sub_i = df_sub.index[i]
if dt_sub_i in df_main.index:
new_index[i] = dt_sub_i
continue
# Found a bad index date, need to search for near-match in df_main (same week/month)
fixed = False
for j in range(df_main.shape[0] - 1):
dt_main_j0 = df_main.index[j]
dt_main_j1 = df_main.index[j + 1]
if (dt_main_j0 <= dt_sub_i) and (dt_sub_i < dt_main_j1):
fixed = True
if interval.endswith('h') or interval.endswith('m'):
# Must also be same day
fixed = (dt_main_j0.date() == dt_sub_i.date()) and (dt_sub_i.date() == dt_main_j1.date())
if fixed:
dt_sub_i = dt_main_j0
break
if not fixed:
last_main_dt = df_main.index[df_main.shape[0] - 1]
diff = dt_sub_i - last_main_dt
if interval == "1mo" and last_main_dt.month == dt_sub_i.month:
dt_sub_i = last_main_dt
fixed = True
elif interval == "3mo" and last_main_dt.year == dt_sub_i.year and last_main_dt.quarter == dt_sub_i.quarter:
dt_sub_i = last_main_dt
fixed = True
elif interval == "1wk":
if last_main_dt.week == dt_sub_i.week:
dt_sub_i = last_main_dt
fixed = True
elif (dt_sub_i >= last_main_dt) and (dt_sub_i - last_main_dt < _datetime.timedelta(weeks=1)):
# With some specific start dates (e.g. around early Jan), Yahoo
# messes up start-of-week, is Saturday not Monday. So check
# if same week another way
dt_sub_i = last_main_dt
fixed = True
elif interval == "1d" and last_main_dt.day == dt_sub_i.day:
dt_sub_i = last_main_dt
fixed = True
elif interval == "1h" and last_main_dt.hour == dt_sub_i.hour:
dt_sub_i = last_main_dt
fixed = True
elif interval.endswith('m') or interval.endswith('h'):
td = _pd.to_timedelta(interval)
if (dt_sub_i >= last_main_dt) and (dt_sub_i - last_main_dt < td):
dt_sub_i = last_main_dt
fixed = True
new_index[i] = dt_sub_i
df_sub = _reindex_events(df_sub, new_index, data_col)
df = df_main.join(df_sub)
f_na = df[data_col].isna()
data_lost = sum(~f_na) < df_sub.shape[0]
if data_lost:
## Not always possible to match events with trading, e.g. when released pre-market.
## So have to append to bottom with nan prices.
## But should only be impossible with intra-day price data.
if interval.endswith('m') or interval.endswith('h') or interval == "1d":
# Update: is possible with daily data when dividend very recent
f_missing = ~df_sub.index.isin(df.index)
df_sub_missing = df_sub[f_missing]
keys = {"Adj Open", "Open", "Adj High", "High", "Adj Low", "Low", "Adj Close",
"Close"}.intersection(df.columns)
df_sub_missing[list(keys)] = _np.nan
col_ordering = df.columns
df = _pd.concat([df, df_sub_missing], sort=True)[col_ordering]
else:
raise Exception("Lost data during merge despite all attempts to align data (see above)")
return df
return dividends, splits
def fix_Yahoo_dst_issue(df, interval):
if interval in ["1d", "1w", "1wk"]:
if interval in ["1d","1w","1wk"]:
# These intervals should start at time 00:00. But for some combinations of date and timezone,
# Yahoo has time off by few hours (e.g. Brazil 23:00 around Jan-2022). Suspect DST problem.
# The clue is (a) minutes=0 and (b) hour near 0.
# Obviously Yahoo meant 00:00, so ensure this doesn't affect date conversion:
f_pre_midnight = (df.index.minute == 0) & (df.index.hour.isin([22, 23]))
dst_error_hours = _np.array([0] * df.shape[0])
dst_error_hours[f_pre_midnight] = 24 - df.index[f_pre_midnight].hour
f_pre_midnight = (df.index.minute == 0) & (df.index.hour.isin([22,23]))
dst_error_hours = _np.array([0]*df.shape[0])
dst_error_hours[f_pre_midnight] = 24-df.index[f_pre_midnight].hour
df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
return df
def is_valid_timezone(tz: str) -> bool:
try:
_tz.timezone(tz)
except UnknownTimeZoneError:
return False
return True
class ProgressBar:
def __init__(self, iterations, text='completed'):
self.text = text
@@ -680,172 +308,63 @@ class ProgressBar:
all_full = self.width - 2
num_hashes = int(round((percent_done / 100.0) * all_full))
self.prog_bar = '[' + self.fill_char * \
num_hashes + ' ' * (all_full - num_hashes) + ']'
num_hashes + ' ' * (all_full - num_hashes) + ']'
pct_place = (len(self.prog_bar) // 2) - len(str(percent_done))
pct_string = '%d%%' % percent_done
self.prog_bar = self.prog_bar[0:pct_place] + \
(pct_string + self.prog_bar[pct_place + len(pct_string):])
(pct_string + self.prog_bar[pct_place + len(pct_string):])
def __str__(self):
return str(self.prog_bar)
# ---------------------------------
# TimeZone cache related code
# ---------------------------------
# Simple file cache of ticker->timezone:
_cache_dp = None
def get_cache_dirpath():
if _cache_dp is None:
dp = _os.path.join(_ad.user_cache_dir(), "py-yfinance")
else:
dp = _os.path.join(_cache_dp, "py-yfinance")
return dp
def set_tz_cache_location(dp):
global _cache_dp
_cache_dp = dp
class _KVStore:
"""Simpel Sqlite backed key/value store, key and value are strings. Should be thread safe."""
def cache_lookup_tkr_tz(tkr):
fp = _os.path.join(get_cache_dirpath(), "tkr-tz.csv")
if not _os.path.isfile(fp):
return None
def __init__(self, filename):
self._cache_mutex = Lock()
with self._cache_mutex:
self.conn = _sqlite3.connect(filename, timeout=10, check_same_thread=False)
self.conn.execute('pragma journal_mode=wal')
self.conn.execute('create table if not exists "kv" (key TEXT primary key, value TEXT) without rowid')
self.conn.commit()
_atexit.register(self.close)
mutex.acquire()
df = _pd.read_csv(fp, index_col="Ticker")
mutex.release()
if tkr in df.index:
return df.loc[tkr,"Tz"]
else:
return None
def cache_store_tkr_tz(tkr,tz):
def close(self):
if self.conn is not None:
with self._cache_mutex:
self.conn.close()
self.conn = None
dp = get_cache_dirpath()
fp = _os.path.join(dp, "tkr-tz.csv")
mutex.acquire()
if not _os.path.isdir(dp):
_os.makedirs(dp)
if (not _os.path.isfile(fp)) and (tz is not None):
df = _pd.DataFrame({"Tz":[tz]}, index=[tkr])
df.index.name = "Ticker"
df.to_csv(fp)
def get(self, key: str) -> Union[str, None]:
"""Get value for key if it exists else returns None"""
item = self.conn.execute('select value from "kv" where key=?', (key,))
if item:
return next(item, (None,))[0]
def set(self, key: str, value: str) -> None:
with self._cache_mutex:
self.conn.execute('replace into "kv" (key, value) values (?,?)', (key, value))
self.conn.commit()
def bulk_set(self, kvdata: Dict[str, str]):
records = tuple(i for i in kvdata.items())
with self._cache_mutex:
self.conn.executemany('replace into "kv" (key, value) values (?,?)', records)
self.conn.commit()
def delete(self, key: str):
with self._cache_mutex:
self.conn.execute('delete from "kv" where key=?', (key,))
self.conn.commit()
class _TzCacheException(Exception):
pass
class _TzCache:
"""Simple sqlite file cache of ticker->timezone"""
def __init__(self):
self._tz_db = None
self._setup_cache_folder()
def _setup_cache_folder(self):
if not _os.path.isdir(self._db_dir):
try:
_os.makedirs(self._db_dir)
except OSError as err:
raise _TzCacheException("Error creating TzCache folder: '{}' reason: {}"
.format(self._db_dir, err))
elif not (_os.access(self._db_dir, _os.R_OK) and _os.access(self._db_dir, _os.W_OK)):
raise _TzCacheException("Cannot read and write in TzCache folder: '{}'"
.format(self._db_dir, ))
def lookup(self, tkr):
return self.tz_db.get(tkr)
def store(self, tkr, tz):
else:
df = _pd.read_csv(fp, index_col="Ticker")
if tz is None:
self.tz_db.delete(tkr)
elif self.tz_db.get(tkr) is not None:
raise Exception("Tkr {} tz already in cache".format(tkr))
# Delete if in cache:
if tkr in df.index:
df.drop(tkr).to_csv(fp)
else:
self.tz_db.set(tkr, tz)
if tkr in df.index:
raise Exception("Tkr {} tz already in cache".format(tkr))
df.loc[tkr,"Tz"] = tz
df.to_csv(fp)
mutex.release()
@property
def _db_dir(self):
global _cache_dir
return _os.path.join(_cache_dir, "py-yfinance")
@property
def tz_db(self):
# lazy init
if self._tz_db is None:
self._tz_db = _KVStore(_os.path.join(self._db_dir, "tkr-tz.db"))
self._migrate_cache_tkr_tz()
return self._tz_db
def _migrate_cache_tkr_tz(self):
"""Migrate contents from old ticker CSV-cache to SQLite db"""
old_cache_file_path = _os.path.join(self._db_dir, "tkr-tz.csv")
if not _os.path.isfile(old_cache_file_path):
return None
try:
df = _pd.read_csv(old_cache_file_path, index_col="Ticker")
except _pd.errors.EmptyDataError:
_os.remove(old_cache_file_path)
else:
self.tz_db.bulk_set(df.to_dict()['Tz'])
_os.remove(old_cache_file_path)
class _TzCacheDummy:
"""Dummy cache to use if tz cache is disabled"""
def lookup(self, tkr):
return None
def store(self, tkr, tz):
pass
@property
def tz_db(self):
return None
def get_tz_cache():
"""
Get the timezone cache, initializes it and creates cache folder if needed on first call.
If folder cannot be created for some reason it will fall back to initialize a
dummy cache with same interface as real cash.
"""
# as this can be called from multiple threads, protect it.
with _cache_init_lock:
global _tz_cache
if _tz_cache is None:
try:
_tz_cache = _TzCache()
except _TzCacheException as err:
print("Failed to create TzCache, reason: {}".format(err))
print("TzCache will not be used.")
print("Tip: You can direct cache to use a different location with 'set_tz_cache_location(mylocation)'")
_tz_cache = _TzCacheDummy()
return _tz_cache
_cache_dir = _ad.user_cache_dir()
_cache_init_lock = Lock()
_tz_cache = None
def set_tz_cache_location(cache_dir: str):
"""
Sets the path to create the "py-yfinance" cache folder in.
Useful if the default folder returned by "appdir.user_cache_dir()" is not writable.
Must be called before cache is used (that is, before fetching tickers).
:param cache_dir: Path to use for caches
:return: None
"""
global _cache_dir, _tz_cache
assert _tz_cache is None, "Time Zone cache already initialized, setting path must be done before cache is created"
_cache_dir = cache_dir

View File

@@ -1 +1 @@
version = "0.2.0rc5"
version = "0.1.85"