Compare commits

...

71 Commits

Author SHA1 Message Date
Value Raider
8624216e21 Bump version to 0.2.20 2023-06-07 16:51:17 +01:00
ValueRaider
954e71d19c Update action versions in python-publish.yml
Recent release action generated deprecated error: "Node.js 12 actions are deprecated. Please update the following actions to use Node.js 16: actions/checkout@v2, actions/setup-python@v2."

So simply increasing versions to match latest GitHub usage docs, hopefully works.
2023-06-07 16:48:11 +01:00
ValueRaider
5124059422 Bump version to 0.2.19 2023-06-07 13:28:32 +01:00
ValueRaider
d18cd6f42f Merge pull request #1549 from ranaroussi/dev
dev -> main
2023-06-07 13:23:39 +01:00
ValueRaider
c20211a06c Merge pull request #1547 from bveber/dev 2023-06-06 23:05:40 +01:00
bveber
cdfe7d0d2d add session to download 2023-06-06 01:06:18 -05:00
ValueRaider
cac616a24c Dev version 0.2.19b4 2023-05-25 11:09:31 +01:00
ValueRaider
72a9e45e56 Merge pull request #1541 from ranaroussi/fix/download-logging
Bugfix in `download` logging tracebacks & boost tests
2023-05-25 10:58:52 +01:00
ValueRaider
4802199ae7 Bugfix in download logging tracebacks & boost tests
New logging in `download` stores the tracebacks, but the logic was faulty, this fixes that.
Also improves error handling in `download`.
Unit tests should have detected this so improved them:
- add/improve `download` tests
- disable tests that require Yahoo decryption (because is broken)
- fix logging-related errors
- improve session use
2023-05-24 13:19:39 +01:00
ValueRaider
d9bfd29113 Delete 'Feature request' issue template - can't have nice things 2023-05-23 17:11:31 +01:00
ValueRaider
4711aab7b3 Merge pull request #1536 from ranaroussi/hotfix/tz-cache-migrate-error-again
Fix corrupt tkr-tz-csv halting code (again)
2023-05-23 16:44:35 +01:00
ValueRaider
30d20c1206 Fix corrupt tkr-tz-csv halting code (again) 2023-05-23 16:34:50 +01:00
ValueRaider
5c565c8934 bug_report.md: add instruction to post debug log
bug_report.md: add instruction to post debug log. Plus some minor edits.
2023-05-17 18:44:52 +01:00
ValueRaider
2fff97290b Merge pull request #1528 from ranaroussi/fix/tz-cache-migrate-error
Fix corrupt tkr-tz-csv halting code
2023-05-17 16:59:15 +01:00
ValueRaider
62ca5ab6be Fix corrupt tkr-tz-csv halting code 2023-05-17 15:05:38 +01:00
ValueRaider
83b177b7fb README.md - note on installing betas 2023-05-12 12:11:14 +01:00
ValueRaider
e8b99cb4e6 Dev version 0.2.19b3 2023-05-11 14:04:04 +01:00
ValueRaider
503d234020 Dev version 0.2.19b2 - add missing file 2023-05-11 14:03:38 +01:00
ValueRaider
144efd3b08 Dev version 0.2.19b2 2023-05-11 13:52:41 +01:00
ValueRaider
80fc91ffa9 Merge pull request #1523 from ranaroussi/fix/price-fixes
Price fixes
2023-05-11 13:51:03 +01:00
ValueRaider
9821197fd1 Merge pull request #1522 from ranaroussi/fix/logging-messages
Improve logging messages
2023-05-11 13:50:45 +01:00
ValueRaider
45b5cac33b Improve logging messages
Improve logging messages related to price data fetches:
- fix 'debug is deprecated' msg
- append user args to 'may be delisted' msg - interval & dates/period
- improve formatting of 'cannot reconstruct' msg
- hide errors in 'history()' while accessing 'fast_info[]'
2023-05-10 14:47:58 +01:00
ValueRaider
d755b8c7ff Fix 'history()' edge cases
Fix merging prices & events if prices empty.
If user requested price repair, ensure 'Repaired?' column always present.
2023-05-10 14:44:50 +01:00
ValueRaider
ab1042b4c9 Dev version 0.2.19b1 2023-05-04 22:14:34 +01:00
ValueRaider
8172fc02d2 Merge pull request #1514 from ranaroussi/feature/optimise-history
Optimise Ticker.history() - up to 2x faster
2023-05-04 22:08:40 +01:00
ValueRaider
836082280b Merge branch 'dev' into feature/optimise-history 2023-05-04 22:08:28 +01:00
ValueRaider
6a98c2eda6 Merge pull request #1493 from ranaroussi/feature/error-reporting
Deprecate 'debug' arg, improve 'logging' use
2023-05-04 22:06:54 +01:00
ValueRaider
46f55c8983 Add debug logging to 'history()' ; Improve logger fmt 2023-05-04 22:04:39 +01:00
ValueRaider
b025fef22c Optimise Ticker.history() - up to 2x faster
format_history_metadata() is expensive. Improvements:
- only perform full formatting if user requests metadata
- when pruning prepost data, only format 'tradingPeriods' entry of metadata

Other small optimisations to several internal prices processing methods.

Speedups:
dat.history(period='1wk', interval='1h', prepost=True)  # 2x
dat.history(period='1mo', interval='1h', prepost=True)  # 1.46x
dat.history(period='1wk', interval='1h')  # 1.15x
dat.history(period='1mo', interval='1h')  # 1.13x
dat.history(period='1y', interval='1d')  # 1.36x
dat.history(period='5y', interval='1d')  # 1.13x
2023-04-30 00:35:08 +01:00
ValueRaider
b96319dd64 Merge pull request #1504 from ranaroussi/hotfix/sql-exception
Fix timezone cache error: IntegrityError('NOT NULL constraint failed: kv.key')
2023-04-26 21:29:33 +01:00
ValueRaider
74b88dc62c Fix IntegrityError in timezone cache 2023-04-26 21:27:31 +01:00
ValueRaider
e3778465d8 Merge branch 'dev' into feature/error-reporting 2023-04-22 16:02:56 +01:00
ValueRaider
f82177ea2e Improve download() logging - group errors & tracebacks for cleaner STDOUT 2023-04-16 21:57:04 +01:00
ValueRaider
d30a2a0915 README.md: update 'News' 2023-04-16 21:29:57 +01:00
ValueRaider
142b1f3eb4 Merge pull request #1499 from ranaroussi/main
sync main -> dev
2023-04-16 19:08:50 +01:00
ValueRaider
afad7fcf0b Bump version to 0.2.18 2023-04-16 19:03:08 +01:00
ValueRaider
0baedbe4f5 Merge pull request #1498 from ranaroussi/hotfix/tz-cache-migrate-error
Fix handling Pandas parsing error during TZ-csv-cache migrate
2023-04-16 19:00:50 +01:00
ValueRaider
2c3c3dc8a9 Merge pull request #1496 from ranaroussi/hotfix/fast-info-np-not-found
Fix '_np not found', tweak 'info[] fixed' message
2023-04-16 18:59:38 +01:00
ValueRaider
8585dda77a Fix handling Pandas parsing error during TZ-csv-cache migrate 2023-04-16 15:09:28 +01:00
ValueRaider
3eb60fbd4a Fix '_np not found', tweak 'info[] fixed' message 2023-04-16 10:37:25 +01:00
ValueRaider
d3e2e71a6e Improve logging behaviour, particulary download()
- Use same logger across all files
- download():
  - write tracebacks to DEBUG
  - deprecate 'show_errors' argument
2023-04-15 17:29:07 +01:00
ValueRaider
4937c933a2 Deprecate 'debug' arg, improve 'logging' use 2023-04-15 16:47:39 +01:00
ValueRaider
045cd45893 Bump version to 0.2.17 2023-04-10 21:55:21 +01:00
ValueRaider
6d52cb6e3a Merge pull request #1488 from steven9909/fix_localize
Fix tzinfo missing attribute
2023-04-10 21:51:54 +01:00
steven9909
a24c0e1391 fix tzinfo missing attribute
tzinfo does not have a localize attribute so it is replaced with timestamp in UTC
2023-04-10 16:04:58 -04:00
ValueRaider
1e941fc86a Merge branch 'main' into dev 2023-04-09 23:45:37 +01:00
ValueRaider
0b52e8f118 Bump version to 0.2.16 2023-04-09 23:42:50 +01:00
ValueRaider
d45bed3d53 Fix 'fast_info deprecated' msg appearing at Ticker() init 2023-04-09 23:41:44 +01:00
ValueRaider
e7a3848f69 Merge pull request #1477 from ranaroussi/feature/price-repair-tweaks
Price repair: add 'Repaired?' column, and a bugfix
2023-04-09 21:01:49 +01:00
ValueRaider
3d29ced428 Merge pull request #1474 from garrettladley/leverage-dict-and-list-comps
Leverage dict & list comprehensions in yfinance/tickers.py
2023-04-06 13:26:08 +01:00
garrettladley
2fe5a0a361 leveraged dict & list comps in yfinance/tickers.py 2023-04-05 18:55:47 -04:00
Value Raider
a649b40dc9 Price repair: add 'Repaired?' column, and a bugfix
Price repair changes:
- if user requests price repair, add 'Repaired?' bool column showing what rows were repaired.
- fix price repair requesting <1d data beyond Yahoo's limit.
- fix logger messages
2023-04-03 21:27:04 +01:00
ValueRaider
a01edee4fa Merge pull request #1476 from ranaroussi/main
main -> dev
2023-04-03 21:20:50 +01:00
Value Raider
e89e190d11 Merge branch 'main' into dev 2023-03-21 19:05:56 +00:00
ValueRaider
a236270389 Merge pull request #1457 from ranaroussi/fix/price-fixes-various
Various fixes to price data processing
2023-03-21 18:59:13 +00:00
Value Raider
b5dca4941a Order history_metadata['tradingPeriods'] DF sensibly 2023-03-20 21:18:53 +00:00
Value Raider
6b71ba977c Various fixes to price data processing
- move drop-duplicates to before repair
- fix 'format_history_metadata()' processing 'regular' column
- fix Pandas & Numpy warnings
2023-03-20 21:10:45 +00:00
ValueRaider
6c70b866c7 Merge pull request #1423 from flaviovs/no-print
No print
2023-02-20 20:07:23 +00:00
Value Raider
bd696fb4db Beta version 0.2.13b1 2023-02-17 17:04:39 +00:00
Value Raider
d13aafa633 Replace more prints with logging, mostly in 'price repair' 2023-02-17 12:01:11 +00:00
Flávio Veloso Soares
00823f6fa6 Remove redundant logging text 2023-02-16 16:53:33 -08:00
Flávio Veloso Soares
21fdba9021 Replace warnings print() with warnings.warn(...) calls 2023-02-16 16:53:33 -08:00
Flávio Veloso Soares
972547ca8c Replace prints with logging module 2023-02-16 16:53:33 -08:00
ValueRaider
23b400f0fb Merge pull request #1421 from ranaroussi/fix/missing-price-history-errors
Improve handling missing price history
2023-02-16 14:22:10 +00:00
Value Raider
a1a385196b Improve handling missing price history
Fix fast_info[] dying if metadata incomplete/missing ; Price repair fix when no fine data available ; Fix _fix_unit_mixups() report
2023-02-14 17:31:14 +00:00
ValueRaider
a0046439d1 Merge pull request #1400 from ranaroussi/feature/improve-performance
Optimise recent new features in `history`
2023-02-12 14:58:36 +00:00
ValueRaider
63a8476575 Merge pull request #1417 from ranaroussi/main
main -> dev
2023-02-12 14:56:19 +00:00
ValueRaider
0f5db35b6e Optimise Ticker._reconstruct_intervals_batch() (slightly) 2023-02-05 18:16:08 +00:00
ValueRaider
7c6742a60a Optimise Ticker._fix_unit_mixups() 2023-02-05 15:15:56 +00:00
ValueRaider
36ace8017d Optimise Ticker._fix_zeroes() 2023-02-05 13:46:57 +00:00
ValueRaider
ead0bce96e Optimise format_history_metadata() 2023-02-04 22:56:49 +00:00
20 changed files with 1155 additions and 864 deletions

View File

@@ -9,7 +9,7 @@ assignees: ''
# IMPORTANT
If you want help, you got to read this first, follow the instructions.
# Read and follow these instructions carefully. Help us help you.
### Are you up-to-date?
@@ -25,19 +25,20 @@ and comparing against [PIP](https://pypi.org/project/yfinance/#history).
### Does Yahoo actually have the data?
Are you spelling ticker *exactly* same as Yahoo?
Are you spelling symbol *exactly* same as Yahoo?
Then visit `finance.yahoo.com` and confirm they have the data you want. Maybe your ticker was delisted, or your expectations of `yfinance` are wrong.
Then visit `finance.yahoo.com` and confirm they have the data you want. Maybe your symbol was delisted, or your expectations of `yfinance` are wrong.
### Are you spamming Yahoo?
Yahoo Finance free service has rate-limiting depending on request type - roughly 60/minute for prices, 10/minute for info. Once limit hit, Yahoo can delay, block, or return bad data. Not a `yfinance` bug.
Yahoo Finance free service has rate-limiting depending on request type - roughly 60/minute for prices, 10/minute for info. Once limit hit, Yahoo can delay, block, or return bad data -> not a `yfinance` bug.
### Still think it's a bug?
Delete this default message (all of it) and submit your bug report here, providing the following as best you can:
**Delete these instructions** and replace with your bug report, providing the following as best you can:
- Simple code that reproduces your problem, that we can copy-paste-run
- Exception message with full traceback, or proof `yfinance` returning bad data
- `yfinance` version and Python version
- Operating system type
- Simple code that reproduces your problem, that we can copy-paste-run.
- Run code with [debug logging enabled](https://github.com/ranaroussi/yfinance/tree/dev#logging) and post the full output.
- If you think `yfinance` returning bad data, give us proof.
- `yfinance` version and Python version.
- Operating system type.

View File

@@ -1,14 +0,0 @@
---
name: Feature request
about: Request a new feature
title: ''
labels: ''
assignees: ''
---
**Describe the problem**
**Describe the solution**
**Additional context**

View File

@@ -13,9 +13,9 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: '3.x'
- name: Install dependencies

View File

@@ -1,6 +1,27 @@
Change Log
===========
0.2.20
------
Switch to `logging` module #1493 #1522 #1541
Price history:
- optimise #1514
- fixes #1523
- fix TZ-cache corruption #1528
0.2.18
------
Fix 'fast_info' error '_np not found' #1496
Fix bug in timezone cache #1498
0.2.17
------
Fix prices error with Pandas 2.0 #1488
0.2.16
------
Fix 'fast_info deprecated' msg appearing at Ticker() init
0.2.15
------
Restore missing Ticker.info keys #1480

View File

@@ -42,10 +42,19 @@ Yahoo! finance API is intended for personal use only.**
---
## News [2023-01-27]
Since December 2022 Yahoo has been encrypting the web data that `yfinance` scrapes for non-market data. Fortunately the decryption keys are available, although Yahoo moved/changed them several times hence `yfinance` breaking several times. `yfinance` is now better prepared for any future changes by Yahoo.
## News
Why is Yahoo doing this? We don't know. Is it to stop scrapers? Maybe, so we've implemented changes to reduce load on Yahoo. In December we rolled out version 0.2 with optimised scraping. ~Then in 0.2.6 introduced `Ticker.fast_info`, providing much faster access to some `info` elements wherever possible e.g. price stats and forcing users to switch (sorry but we think necessary). `info` will continue to exist for as long as there are elements without a fast alternative.~ `info` now fixed and much faster than before.
### 2023-01-27
Since December 2022 Yahoo has been encrypting the web data that `yfinance` scrapes for non-price data. Price data still works. Fortunately the decryption keys are available, although Yahoo moved/changed them several times hence `yfinance` breaking several times. `yfinance` is now better prepared for any future changes by Yahoo.
Why is Yahoo doing this? We don't know. Is it to stop scrapers? Maybe, so we've implemented changes to reduce load on Yahoo. In December we rolled out version 0.2 with optimised scraping. Then in 0.2.6 introduced `Ticker.fast_info`, providing much faster access to some `Ticker.info` elements wherever possible e.g. price stats and forcing users to switch (sorry but we think necessary).
### 2023-02-07
Yahoo is now regularly changing their decryption key, breaking `yfinance` decryption. Is technically possible to extract this from their webpage but not implemented because difficult, see [discussion in the issue thread](https://github.com/ranaroussi/yfinance/issues/1407).
### 2023-04-09
Fixed `Ticker.info`
## Quick Start
@@ -186,6 +195,17 @@ yf.download(tickers = "SPY AAPL", # list of tickers
Review the [Wiki](https://github.com/ranaroussi/yfinance/wiki) for more options and detail.
### Logging
`yfinance` now uses the `logging` module. To control the detail of printed messages you simply change the level:
```
import logging
logger = logging.getLogger('yfinance')
logger.setLevel(logging.ERROR) # default: only print errors
logger.setLevel(logging.CRITICAL) # disable printing
logger.setLevel(logging.DEBUG) # verbose: print errors & debug info
```
### Smarter scraping
To use a custom `requests` session (for example to cache calls to the
@@ -271,6 +291,11 @@ Install `yfinance` using `pip`:
$ pip install yfinance --upgrade --no-cache-dir
```
Test new features by installing betas, provide feedback in [corresponding Discussion](https://github.com/ranaroussi/yfinance/discussions):
``` {.sourceCode .bash}
$ pip install yfinance --upgrade --no-cache-dir --pre
```
To install `yfinance` using `conda`, see
[this](https://anaconda.org/ranaroussi/yfinance).

View File

@@ -1,5 +1,5 @@
{% set name = "yfinance" %}
{% set version = "0.2.15" %}
{% set version = "0.2.20" %}
package:
name: "{{ name|lower }}"

View File

@@ -15,6 +15,9 @@ Sanity check for most common library uses all working
import yfinance as yf
import unittest
import logging
logging.basicConfig(level=logging.DEBUG)
symbols = ['MSFT', 'IWO', 'VFINX', '^GSPC', 'BTC-USD']
tickers = [yf.Ticker(symbol) for symbol in symbols]

View File

@@ -7,3 +7,32 @@ _src_dp = _parent_dp
sys.path.insert(0, _src_dp)
import yfinance
# Optional: see the exact requests that are made during tests:
# import logging
# logging.basicConfig(level=logging.DEBUG)
# Setup a session to rate-limit and cache persistently:
import datetime as _dt
import os
import appdirs as _ad
from requests import Session
from requests_cache import CacheMixin, SQLiteCache
from requests_ratelimiter import LimiterMixin, MemoryQueueBucket
class CachedLimiterSession(CacheMixin, LimiterMixin, Session):
pass
from pyrate_limiter import Duration, RequestRate, Limiter
history_rate = RequestRate(1, Duration.SECOND*2)
limiter = Limiter(history_rate)
session_gbl = CachedLimiterSession(
limiter=limiter,
bucket_class=MemoryQueueBucket,
backend=SQLiteCache(os.path.join(_ad.user_cache_dir(), "py-yfinance", "unittests-cache"),
expire_after=_dt.timedelta(hours=1)),
)
# Use this instead if only want rate-limiting:
# from requests_ratelimiter import LimiterSession
# session_gbl = LimiterSession(limiter=limiter)

View File

@@ -1,4 +1,5 @@
from .context import yfinance as yf
from .context import session_gbl
import unittest
@@ -7,15 +8,11 @@ import pytz as _tz
import numpy as _np
import pandas as _pd
import requests_cache
class TestPriceHistory(unittest.TestCase):
session = None
@classmethod
def setUpClass(cls):
cls.session = requests_cache.CachedSession(backend='memory')
cls.session = session_gbl
@classmethod
def tearDownClass(cls):
@@ -34,11 +31,23 @@ class TestPriceHistory(unittest.TestCase):
f = df.index.time == _dt.time(0)
self.assertTrue(f.all())
def test_download(self):
tkrs = ["BHP.AX", "IMP.JO", "BP.L", "PNL.L", "INTC"]
intervals = ["1d", "1wk", "1mo"]
for interval in intervals:
df = yf.download(tkrs, period="5y", interval=interval)
f = df.index.time == _dt.time(0)
self.assertTrue(f.all())
df_tkrs = df.columns.levels[1]
self.assertEqual(sorted(tkrs), sorted(df_tkrs))
def test_duplicatingHourly(self):
tkrs = ["IMP.JO", "BHG.JO", "SSW.JO", "BP.L", "INTC"]
for tkr in tkrs:
dat = yf.Ticker(tkr, session=self.session)
tz = dat._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
tz = dat._get_ticker_tz(proxy=None, timeout=None)
dt_utc = _tz.timezone("UTC").localize(_dt.datetime.utcnow())
dt = dt_utc.astimezone(_tz.timezone(tz))
@@ -58,7 +67,7 @@ class TestPriceHistory(unittest.TestCase):
test_run = False
for tkr in tkrs:
dat = yf.Ticker(tkr, session=self.session)
tz = dat._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
tz = dat._get_ticker_tz(proxy=None, timeout=None)
dt_utc = _tz.timezone("UTC").localize(_dt.datetime.utcnow())
dt = dt_utc.astimezone(_tz.timezone(tz))
@@ -84,7 +93,7 @@ class TestPriceHistory(unittest.TestCase):
test_run = False
for tkr in tkrs:
dat = yf.Ticker(tkr, session=self.session)
tz = dat._get_ticker_tz(debug_mode=False, proxy=None, timeout=None)
tz = dat._get_ticker_tz(proxy=None, timeout=None)
dt = _tz.timezone(tz).localize(_dt.datetime.now())
if dt.date().weekday() not in [1, 2, 3, 4]:
@@ -401,7 +410,7 @@ class TestPriceRepair(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.session = requests_cache.CachedSession(backend='memory')
cls.session = session_gbl
@classmethod
def tearDownClass(cls):
@@ -479,6 +488,9 @@ class TestPriceRepair(unittest.TestCase):
f_1 = ratio == 1
self.assertTrue((f_100 | f_1).all())
self.assertTrue("Repaired?" in df_repaired.columns)
self.assertFalse(df_repaired["Repaired?"].isna().any())
def test_repair_100x_weekly_preSplit(self):
# PNL.L has a stock-split in 2022. Sometimes requesting data before 2022 is not split-adjusted.
@@ -536,6 +548,9 @@ class TestPriceRepair(unittest.TestCase):
f_1 = ratio == 1
self.assertTrue((f_100 | f_1).all())
self.assertTrue("Repaired?" in df_repaired.columns)
self.assertFalse(df_repaired["Repaired?"].isna().any())
def test_repair_100x_daily(self):
tkr = "PNL.L"
dat = yf.Ticker(tkr, session=self.session)
@@ -578,6 +593,9 @@ class TestPriceRepair(unittest.TestCase):
f_1 = ratio == 1
self.assertTrue((f_100 | f_1).all())
self.assertTrue("Repaired?" in df_repaired.columns)
self.assertFalse(df_repaired["Repaired?"].isna().any())
def test_repair_zeroes_daily(self):
tkr = "BBIL.L"
dat = yf.Ticker(tkr, session=self.session)
@@ -605,6 +623,9 @@ class TestPriceRepair(unittest.TestCase):
for c in ["Open", "Low", "High", "Close"]:
self.assertTrue(_np.isclose(repaired_df[c], correct_df[c], rtol=1e-8).all())
self.assertTrue("Repaired?" in repaired_df.columns)
self.assertFalse(repaired_df["Repaired?"].isna().any())
def test_repair_zeroes_hourly(self):
tkr = "INTC"
dat = yf.Ticker(tkr, session=self.session)
@@ -636,13 +657,8 @@ class TestPriceRepair(unittest.TestCase):
print(repaired_df[c] - correct_df[c])
raise
self.assertTrue("Repaired?" in repaired_df.columns)
self.assertFalse(repaired_df["Repaired?"].isna().any())
if __name__ == '__main__':
unittest.main()
# # Run tests sequentially:
# import inspect
# test_src = inspect.getsource(TestPriceHistory)
# unittest.TestLoader.sortTestMethodsUsing = lambda _, x, y: (
# test_src.index(f"def {x}") - test_src.index(f"def {y}")
# )
# unittest.main(verbosity=2)

File diff suppressed because it is too large Load Diff

View File

@@ -21,6 +21,7 @@
from __future__ import print_function
import warnings
import time as _time
import datetime as _datetime
import dateutil as _dateutil
@@ -43,17 +44,20 @@ from .scrapers.holders import Holders
from .scrapers.quote import Quote, FastInfo
import json as _json
import logging
logger = utils.get_yf_logger()
_BASE_URL_ = 'https://query2.finance.yahoo.com'
_SCRAPE_URL_ = 'https://finance.yahoo.com/quote'
_ROOT_URL_ = 'https://finance.yahoo.com'
class TickerBase:
def __init__(self, ticker, session=None):
self.ticker = ticker.upper()
self.session = session
self._history = None
self._history_metadata = None
self._history_metadata_formatted = False
self._base_url = _BASE_URL_
self._scrape_url = _SCRAPE_URL_
self._tz = None
@@ -78,7 +82,7 @@ class TickerBase:
self._quote = Quote(self._data)
self._fundamentals = Fundamentals(self._data)
self._fast_info = FastInfo(self)
self._fast_info = None
def stats(self, proxy=None):
ticker_url = "{}/{}".format(self._scrape_url, self.ticker)
@@ -91,7 +95,8 @@ class TickerBase:
start=None, end=None, prepost=False, actions=True,
auto_adjust=True, back_adjust=False, repair=False, keepna=False,
proxy=None, rounding=False, timeout=10,
debug=True, raise_errors=False) -> pd.DataFrame:
debug=None, # deprecated
raise_errors=False) -> pd.DataFrame:
"""
:Parameters:
period : str
@@ -132,26 +137,34 @@ class TickerBase:
seconds. (Can also be a fraction of a second e.g. 0.01)
Default is 10 seconds.
debug: bool
If passed as False, will suppress
error message printing to console.
If passed as False, will suppress message printing to console.
DEPRECATED, will be removed in future version
raise_errors: bool
If True, then raise errors as
exceptions instead of printing to console.
If True, then raise errors as Exceptions instead of logging.
"""
if debug is not None:
if debug:
utils.print_once(f"yfinance: Ticker.history(debug={debug}) argument is deprecated and will be removed in future version. Do this instead: logging.getLogger('yfinance').setLevel(logging.ERROR)")
logger.setLevel(logging.ERROR)
else:
utils.print_once(f"yfinance: Ticker.history(debug={debug}) argument is deprecated and will be removed in future version. Do this instead to suppress error messages: logging.getLogger('yfinance').setLevel(logging.CRITICAL)")
logger.setLevel(logging.CRITICAL)
start_user = start
end_user = end
if start or period is None or period.lower() == "max":
# Check can get TZ. Fail => probably delisted
tz = self._get_ticker_tz(debug, proxy, timeout)
tz = self._get_ticker_tz(proxy, timeout)
if tz is None:
# Every valid ticker has a timezone. Missing = problem
err_msg = "No timezone found, symbol may be delisted"
shared._DFS[self.ticker] = utils.empty_df()
shared._ERRORS[self.ticker] = err_msg
if debug:
if raise_errors:
raise Exception('%s: %s' % (self.ticker, err_msg))
else:
print('- %s: %s' % (self.ticker, err_msg))
if raise_errors:
raise Exception('%s: %s' % (self.ticker, err_msg))
else:
logger.error('%s: %s' % (self.ticker, err_msg))
return utils.empty_df()
if end is None:
@@ -187,20 +200,25 @@ class TickerBase:
#if the ticker is MUTUALFUND or ETF, then get capitalGains events
params["events"] = "div,splits,capitalGains"
params_pretty = dict(params)
tz = self._get_ticker_tz(proxy, timeout)
for k in ["period1", "period2"]:
if k in params_pretty:
params_pretty[k] = str(_pd.Timestamp(params[k], unit='s').tz_localize("UTC").tz_convert(tz))
logger.debug('%s: %s' % (self.ticker, "Yahoo GET parameters: " + str(params_pretty)))
# Getting data from json
url = "{}/v8/finance/chart/{}".format(self._base_url, self.ticker)
data = None
get_fn = self._data.get
if end is not None:
end_dt = _pd.Timestamp(end, unit='s').tz_localize("UTC")
dt_now = _pd.Timestamp.utcnow()
data_delay = _datetime.timedelta(minutes=30)
if end_dt+data_delay <= dt_now:
# Date range in past so safe to fetch through cache:
get_fn = self._data.cache_get
try:
get_fn = self._data.get
if end is not None:
end_dt = _pd.Timestamp(end, unit='s').tz_localize("UTC")
dt_now = end_dt.tzinfo.localize(_datetime.datetime.utcnow())
data_delay = _datetime.timedelta(minutes=30)
if end_dt+data_delay <= dt_now:
# Date range in past so safe to fetch through cache:
get_fn = self._data.cache_get
data = get_fn(
url=url,
params=params,
@@ -220,9 +238,27 @@ class TickerBase:
self._history_metadata = data["chart"]["result"][0]["meta"]
except Exception:
self._history_metadata = {}
self._history_metadata = utils.format_history_metadata(self._history_metadata)
err_msg = "No data found for this date range, symbol may be delisted"
intraday = params["interval"][-1] in ("m", 'h')
err_msg = "No price data found, symbol may be delisted"
if start or period is None or period.lower() == "max":
err_msg += f' ({params["interval"]} '
if start_user is not None:
err_msg += f'{start_user}'
elif not intraday:
err_msg += f'{_pd.Timestamp(start, unit="s").tz_localize("UTC").tz_convert(tz).date()}'
else:
err_msg += f'{_pd.Timestamp(start, unit="s").tz_localize("UTC").tz_convert(tz)}'
err_msg += ' -> '
if end_user is not None:
err_msg += f'{end_user})'
elif not intraday:
err_msg += f'{_pd.Timestamp(end, unit="s").tz_localize("UTC").tz_convert(tz).date()})'
else:
err_msg += f'{_pd.Timestamp(end, unit="s").tz_localize("UTC").tz_convert(tz)})'
else:
err_msg += f' (period={period})'
fail = False
if data is None or not type(data) is dict:
fail = True
@@ -243,11 +279,10 @@ class TickerBase:
if fail:
shared._DFS[self.ticker] = utils.empty_df()
shared._ERRORS[self.ticker] = err_msg
if debug:
if raise_errors:
raise Exception('%s: %s' % (self.ticker, err_msg))
else:
print('%s: %s' % (self.ticker, err_msg))
if raise_errors:
raise Exception('%s: %s' % (self.ticker, err_msg))
else:
logger.error('%s: %s' % (self.ticker, err_msg))
return utils.empty_df()
# parse quotes
@@ -261,15 +296,16 @@ class TickerBase:
except Exception:
shared._DFS[self.ticker] = utils.empty_df()
shared._ERRORS[self.ticker] = err_msg
if debug:
if raise_errors:
raise Exception('%s: %s' % (self.ticker, err_msg))
else:
print('%s: %s' % (self.ticker, err_msg))
if raise_errors:
raise Exception('%s: %s' % (self.ticker, err_msg))
else:
logger.error('%s: %s' % (self.ticker, err_msg))
return shared._DFS[self.ticker]
logger.debug(f'{self.ticker}: yfinance received OHLC data: {quotes.index[0]} -> {quotes.index[-1]}')
# 2) fix weired bug with Yahoo! - returning 60m for 30m bars
if interval.lower() == "30m":
logger.debug(f'{self.ticker}: resampling 30m OHLC from 15m')
quotes2 = quotes.resample('30T')
quotes = _pd.DataFrame(index=quotes2.last().index, data={
'Open': quotes2['Open'].first(),
@@ -299,7 +335,12 @@ class TickerBase:
quotes = utils.fix_Yahoo_returning_live_separate(quotes, params["interval"], tz_exchange)
intraday = params["interval"][-1] in ("m", 'h')
if not prepost and intraday and "tradingPeriods" in self._history_metadata:
quotes = utils.fix_Yahoo_returning_prepost_unrequested(quotes, params["interval"], self._history_metadata)
tps = self._history_metadata["tradingPeriods"]
if not isinstance(tps, pd.DataFrame):
self._history_metadata = utils.format_history_metadata(self._history_metadata, tradingPeriodsOnly=True)
tps = self._history_metadata["tradingPeriods"]
quotes = utils.fix_Yahoo_returning_prepost_unrequested(quotes, params["interval"], tps)
logger.debug(f'{self.ticker}: OHLC after cleaning: {quotes.index[0]} -> {quotes.index[-1]}')
# actions
dividends, splits, capital_gains = utils.parse_actions(data["chart"]["result"][0])
@@ -313,13 +354,14 @@ class TickerBase:
if capital_gains is not None:
capital_gains = utils.set_df_tz(capital_gains, interval, tz_exchange)
if start is not None:
startDt = quotes.index[0].floor('D')
if dividends is not None:
dividends = dividends.loc[startDt:]
if capital_gains is not None:
capital_gains = capital_gains.loc[startDt:]
if splits is not None:
splits = splits.loc[startDt:]
if not quotes.empty:
startDt = quotes.index[0].floor('D')
if dividends is not None:
dividends = dividends.loc[startDt:]
if capital_gains is not None:
capital_gains = capital_gains.loc[startDt:]
if splits is not None:
splits = splits.loc[startDt:]
if end is not None:
endDt = _pd.Timestamp(end, unit='s').tz_localize(tz)
if dividends is not None:
@@ -361,9 +403,13 @@ class TickerBase:
df.loc[df["Capital Gains"].isna(),"Capital Gains"] = 0
else:
df["Capital Gains"] = 0.0
logger.debug(f'{self.ticker}: OHLC after combining events: {quotes.index[0]} -> {quotes.index[-1]}')
df = df[~df.index.duplicated(keep='first')] # must do before repair
if repair==True or repair=="silent":
# Do this before auto/back adjust
logger.debug(f'{self.ticker}: checking OHLC for repairs ...')
df = self._fix_zeroes(df, interval, tz_exchange, prepost, silent=(repair=="silent"))
df = self._fix_unit_mixups(df, interval, tz_exchange, prepost, silent=(repair=="silent"))
@@ -380,11 +426,10 @@ class TickerBase:
err_msg = "back_adjust failed with %s" % e
shared._DFS[self.ticker] = utils.empty_df()
shared._ERRORS[self.ticker] = err_msg
if debug:
if raise_errors:
raise Exception('%s: %s' % (self.ticker, err_msg))
else:
print('%s: %s' % (self.ticker, err_msg))
if raise_errors:
raise Exception('%s: %s' % (self.ticker, err_msg))
else:
logger.error('%s: %s' % (self.ticker, err_msg))
if rounding:
df = _np.round(df, data[
@@ -396,15 +441,17 @@ class TickerBase:
else:
df.index.name = "Date"
# duplicates and missing rows cleanup
df = df[~df.index.duplicated(keep='first')]
self._history = df.copy()
# missing rows cleanup
if not actions:
df = df.drop(columns=["Dividends", "Stock Splits", "Capital Gains"], errors='ignore')
if not keepna:
mask_nan_or_zero = (df.isna() | (df == 0)).all(axis=1)
df = df.drop(mask_nan_or_zero.index[mask_nan_or_zero])
logger.debug(f'{self.ticker}: yfinance returning OHLC: {df.index[0]} -> {df.index[-1]}')
return df
# ------------------------
@@ -418,9 +465,6 @@ class TickerBase:
# Reconstruct values in df using finer-grained price data. Delimiter marks what to reconstruct
debug = False
# debug = True
if interval[1:] in ['d', 'wk', 'mo']:
# Interday data always includes pre & post
prepost = True
@@ -444,8 +488,9 @@ class TickerBase:
sub_interval = nexts[interval]
td_range = itds[interval]
else:
print("WARNING: Have not implemented repair for '{}' interval. Contact developers".format(interval))
raise Exception("why here")
logger.warning("Have not implemented price repair for '%s' interval. Contact developers", interval)
if not "Repaired?" in df.columns:
df["Repaired?"] = False
return df
df = df.sort_index()
@@ -461,25 +506,28 @@ class TickerBase:
m -= _datetime.timedelta(days=1) # allow space for 1-day padding
min_dt = _pd.Timestamp.utcnow() - m
min_dt = min_dt.tz_convert(df.index.tz).ceil("D")
if debug:
print(f"- min_dt={min_dt} interval={interval} sub_interval={sub_interval}")
logger.debug(f"min_dt={min_dt} interval={interval} sub_interval={sub_interval}")
if min_dt is not None:
f_recent = df.index >= min_dt
f_repair_rows = f_repair_rows & f_recent
if not f_repair_rows.any():
if debug:
print("data too old to repair")
logger.info("Data too old to repair")
if not "Repaired?" in df.columns:
df["Repaired?"] = False
return df
dts_to_repair = df.index[f_repair_rows]
indices_to_repair = _np.where(f_repair_rows)[0]
if len(dts_to_repair) == 0:
if debug:
print("dts_to_repair[] is empty")
logger.info("Nothing needs repairing (dts_to_repair[] empty)")
if not "Repaired?" in df.columns:
df["Repaired?"] = False
return df
df_v2 = df.copy()
if not "Repaired?" in df_v2.columns:
df_v2["Repaired?"] = False
f_good = ~(df[price_cols].isna().any(axis=1))
f_good = f_good & (df[price_cols].to_numpy()!=tag).all(axis=1)
df_good = df[f_good]
@@ -502,8 +550,7 @@ class TickerBase:
grp_max_size = _datetime.timedelta(days=5) # allow 2 days for buffer below
else:
grp_max_size = _datetime.timedelta(days=30)
if debug:
print("- grp_max_size =", grp_max_size)
logger.debug(f"grp_max_size = {grp_max_size}")
for i in range(1, len(dts_to_repair)):
ind = indices_to_repair[i]
dt = dts_to_repair[i]
@@ -514,12 +561,11 @@ class TickerBase:
last_dt = dt
last_ind = ind
if debug:
print("Repair groups:")
for g in dts_groups:
print(f"- {g[0]} -> {g[-1]}")
logger.debug("Repair groups:")
for g in dts_groups:
logger.debug(f"- {g[0]} -> {g[-1]}")
# Add some good data to each group, so can calibrate later:
# Add some good data to each group, so can calibrate prices later:
for i in range(len(dts_groups)):
g = dts_groups[i]
g0 = g[0]
@@ -540,24 +586,30 @@ class TickerBase:
n_fixed = 0
for g in dts_groups:
df_block = df[df.index.isin(g)]
if debug:
print("- df_block:")
print(df_block)
logger.debug("df_block:")
logger.debug(df_block)
start_dt = g[0]
start_d = start_dt.date()
reject = False
if sub_interval == "1h" and (_datetime.date.today() - start_d) > _datetime.timedelta(days=729):
# Don't bother requesting more price data, Yahoo will reject
if debug:
print(f"- Don't bother requesting {sub_interval} price data, Yahoo will reject")
continue
reject = True
elif sub_interval in ["30m", "15m"] and (_datetime.date.today() - start_d) > _datetime.timedelta(days=59):
reject = True
if reject:
# Don't bother requesting more price data, Yahoo will reject
if debug:
print(f"- Don't bother requesting {sub_interval} price data, Yahoo will reject")
msg = f"Cannot reconstruct {interval} block starting"
if intraday:
msg += f" {start_dt}"
else:
msg += f" {start_d}"
msg += ", too old, Yahoo will reject request for finer-grain data"
logger.warning(msg)
continue
td_1d = _datetime.timedelta(days=1)
end_dt = g[-1]
end_d = end_dt.date() + td_1d
if interval in "1wk":
fetch_start = start_d - td_range # need previous week too
fetch_end = g[-1].date() + td_range
@@ -574,16 +626,33 @@ class TickerBase:
if intraday:
fetch_start = fetch_start.date()
fetch_end = fetch_end.date()+td_1d
if debug:
print(f"- fetching {sub_interval} prepost={prepost} {fetch_start}->{fetch_end}")
if min_dt is not None:
fetch_start = max(min_dt.date(), fetch_start)
logger.debug(f"Fetching {sub_interval} prepost={prepost} {fetch_start}->{fetch_end}")
r = "silent" if silent else True
df_fine = self.history(start=fetch_start, end=fetch_end, interval=sub_interval, auto_adjust=False, actions=False, prepost=prepost, repair=r, keepna=True)
if df_fine is None or df_fine.empty:
if not silent:
print("YF: WARNING: Cannot reconstruct because Yahoo not returning data in interval")
msg = f"Cannot reconstruct {interval} block starting"
if intraday:
msg += f" {start_dt}"
else:
msg += f" {start_d}"
msg += ", too old, Yahoo is rejecting request for finer-grain data"
logger.warning(msg)
continue
# Discard the buffer
df_fine = df_fine.loc[g[0] : g[-1]+itds[sub_interval]-_datetime.timedelta(milliseconds=1)]
df_fine = df_fine.loc[g[0] : g[-1]+itds[sub_interval]-_datetime.timedelta(milliseconds=1)].copy()
if df_fine.empty:
if not silent:
msg = f"Cannot reconstruct {interval} block range"
if intraday:
msg += f" {start_dt}->{end_dt}"
else:
msg += f" {start_d}->{end_d}"
msg += ", Yahoo not returning finer-grain data within range"
logger.warning(msg)
continue
df_fine["ctr"] = 0
if interval == "1wk":
@@ -616,25 +685,22 @@ class TickerBase:
new_index = _np.append([df_fine.index[0]], df_fine.index[df_fine["intervalID"].diff()>0])
df_new.index = new_index
if debug:
print("- df_new:")
print(df_new)
logger.debug("df_new:")
logger.debug(df_new)
# Calibrate! Check whether 'df_fine' has different split-adjustment.
# If different, then adjust to match 'df'
common_index = _np.intersect1d(df_block.index, df_new.index)
if len(common_index) == 0:
# Can't calibrate so don't attempt repair
if debug:
print("Can't calibrate so don't attempt repair")
logger.warning(f"Can't calibrate {interval} block starting {start_d} so aborting repair")
continue
df_new_calib = df_new[df_new.index.isin(common_index)][price_cols].to_numpy()
df_block_calib = df_block[df_block.index.isin(common_index)][price_cols].to_numpy()
calib_filter = (df_block_calib != tag)
if not calib_filter.any():
# Can't calibrate so don't attempt repair
if debug:
print("Can't calibrate so don't attempt repair")
logger.warning(f"Can't calibrate {interval} block starting {start_d} so aborting repair")
continue
# Avoid divide-by-zero warnings:
for j in range(len(price_cols)):
@@ -650,8 +716,7 @@ class TickerBase:
weights = _np.tile(weights, len(price_cols)) # 1D -> 2D
weights = weights[calib_filter] # flatten
ratio = _np.average(ratios, weights=weights)
if debug:
print(f"- price calibration ratio (raw) = {ratio}")
logger.debug(f"Price calibration ratio (raw) = {ratio}")
ratio_rcp = round(1.0 / ratio, 1)
ratio = round(ratio, 1)
if ratio == 1 and ratio_rcp == 1:
@@ -670,18 +735,17 @@ class TickerBase:
df_new["Volume"] *= ratio_rcp
# Repair!
bad_dts = df_block.index[(df_block[price_cols+["Volume"]]==tag).any(axis=1)]
bad_dts = df_block.index[(df_block[price_cols+["Volume"]]==tag).to_numpy().any(axis=1)]
if debug:
no_fine_data_dts = []
for idx in bad_dts:
if not idx in df_new.index:
# Yahoo didn't return finer-grain data for this interval,
# so probably no trading happened.
no_fine_data_dts.append(idx)
if len(no_fine_data_dts) > 0:
print(f"Yahoo didn't return finer-grain data for these intervals:")
print(no_fine_data_dts)
no_fine_data_dts = []
for idx in bad_dts:
if not idx in df_new.index:
# Yahoo didn't return finer-grain data for this interval,
# so probably no trading happened.
no_fine_data_dts.append(idx)
if len(no_fine_data_dts) > 0:
logger.debug(f"Yahoo didn't return finer-grain data for these intervals:")
logger.debug(no_fine_data_dts)
for idx in bad_dts:
if not idx in df_new.index:
# Yahoo didn't return finer-grain data for this interval,
@@ -694,7 +758,7 @@ class TickerBase:
df_fine = df_fine.loc[idx:]
df_bad_row = df.loc[idx]
bad_fields = df_bad_row.index[df_bad_row==tag].values
bad_fields = df_bad_row.index[df_bad_row==tag].to_numpy()
if "High" in bad_fields:
df_v2.loc[idx, "High"] = df_new_row["High"]
if "Low" in bad_fields:
@@ -712,10 +776,11 @@ class TickerBase:
df_v2.loc[idx, "Adj Close"] = df_new_row["Adj Close"]
if "Volume" in bad_fields:
df_v2.loc[idx, "Volume"] = df_new_row["Volume"]
df_v2.loc[idx, "Repaired?"] = True
n_fixed += 1
if debug:
print("df_v2:") ; print(df_v2)
logger.debug("df_v2:")
logger.debug(df_v2)
return df_v2
@@ -725,16 +790,21 @@ class TickerBase:
# Easy to detect and fix, just look for outliers = ~100x local median
if df.shape[0] == 0:
if not "Repaired?" in df.columns:
df["Repaired?"] = False
return df
if df.shape[0] == 1:
# Need multiple rows to confidently identify outliers
logger.warning("Cannot check single-row table for 100x price errors")
if not "Repaired?" in df.columns:
df["Repaired?"] = False
return df
df2 = df.copy()
if df.index.tz is None:
df2.index = df2.index.tz_localize(tz_exchange)
else:
elif df2.index.tz != tz_exchange:
df2.index = df2.index.tz_convert(tz_exchange)
# Only import scipy if users actually want function. To avoid
@@ -743,19 +813,26 @@ class TickerBase:
data_cols = ["High", "Open", "Low", "Close", "Adj Close"] # Order important, separate High from Low
data_cols = [c for c in data_cols if c in df2.columns]
f_zeroes = (df2[data_cols]==0).any(axis=1)
f_zeroes = (df2[data_cols]==0).any(axis=1).to_numpy()
if f_zeroes.any():
df2_zeroes = df2[f_zeroes]
df2 = df2[~f_zeroes]
else:
df2_zeroes = None
if df2.shape[0] <= 1:
logger.warning("Insufficient good data for detecting 100x price errors")
if not "Repaired?" in df.columns:
df["Repaired?"] = False
return df
median = _ndimage.median_filter(df2[data_cols].values, size=(3, 3), mode="wrap")
ratio = df2[data_cols].values / median
df2_data = df2[data_cols].to_numpy()
median = _ndimage.median_filter(df2_data, size=(3, 3), mode="wrap")
ratio = df2_data / median
ratio_rounded = (ratio / 20).round() * 20 # round ratio to nearest 20
f = ratio_rounded == 100
if not f.any():
logger.info("No bad data (100x wrong) to repair")
if not "Repaired?" in df.columns:
df["Repaired?"] = False
return df
# Mark values to send for repair
@@ -765,14 +842,15 @@ class TickerBase:
c = data_cols[i]
df2.loc[fi, c] = tag
n_before = (df2[data_cols].to_numpy()==tag).sum()
n_before = (df2_data==tag).sum()
df2 = self._reconstruct_intervals_batch(df2, interval, prepost, tag, silent)
df2_tagged = df2[data_cols].to_numpy()==tag
n_after = (df2[data_cols].to_numpy()==tag).sum()
if n_after > 0:
# This second pass will *crudely* "fix" any remaining errors in High/Low
# simply by ensuring they don't contradict e.g. Low = 100x High.
f = df2[data_cols].to_numpy()==tag
f = df2_tagged
for i in range(f.shape[0]):
fi = f[i,:]
if not fi.any():
@@ -804,7 +882,10 @@ class TickerBase:
if fi[j]:
df2.loc[idx, c] = df2.loc[idx, ["Open", "Close"]].min()
n_after_crude = (df2[data_cols].to_numpy()==tag).sum()
df2_tagged = df2[data_cols].to_numpy()==tag
n_after_crude = df2_tagged.sum()
else:
n_after_crude = n_after
n_fixed = n_before - n_after_crude
n_fixed_crudely = n_after - n_after_crude
@@ -813,16 +894,18 @@ class TickerBase:
if n_fixed_crudely > 0:
report_msg += f"({n_fixed_crudely} crudely) "
report_msg += f"in {interval} price data"
print(report_msg)
logger.info('%s', report_msg)
# Restore original values where repair failed
f = df2[data_cols].values==tag
f = df2_tagged
for j in range(len(data_cols)):
fj = f[:,j]
if fj.any():
c = data_cols[j]
df2.loc[fj, c] = df.loc[fj, c]
if df2_zeroes is not None:
if not "Repaired?" in df2_zeroes.columns:
df2_zeroes["Repaired?"] = False
df2 = _pd.concat([df2, df2_zeroes]).sort_index()
df2.index = _pd.to_datetime()
@@ -834,11 +917,10 @@ class TickerBase:
# Impossible to distinguish, so only attempt repair if few or rare.
if df.shape[0] == 0:
if not "Repaired?" in df.columns:
df["Repaired?"] = False
return df
debug = False
# debug = True
intraday = interval[-1] in ("m", 'h')
df = df.sort_index() # important!
@@ -846,7 +928,7 @@ class TickerBase:
if df2.index.tz is None:
df2.index = df2.index.tz_localize(tz_exchange)
else:
elif df2.index.tz != tz_exchange:
df2.index = df2.index.tz_convert(tz_exchange)
price_cols = [c for c in ["Open", "High", "Low", "Close", "Adj Close"] if c in df2.columns]
@@ -854,30 +936,31 @@ class TickerBase:
df2_reserve = None
if intraday:
# Ignore days with >50% intervals containing NaNs
df_nans = pd.DataFrame(f_prices_bad.any(axis=1), columns=["nan"])
df_nans["_date"] = df_nans.index.date
grp = df_nans.groupby("_date")
grp = pd.Series(f_prices_bad.any(axis=1), name="nan").groupby(f_prices_bad.index.date)
nan_pct = grp.sum() / grp.count()
dts = nan_pct.index[nan_pct["nan"]>0.5]
dts = nan_pct.index[nan_pct>0.5]
f_zero_or_nan_ignore = _np.isin(f_prices_bad.index.date, dts)
df2_reserve = df2[f_zero_or_nan_ignore]
df2 = df2[~f_zero_or_nan_ignore]
f_prices_bad = (df2[price_cols] == 0.0) | df2[price_cols].isna()
f_high_low_good = (~df2["High"].isna()) & (~df2["Low"].isna())
f_vol_bad = (df2["Volume"]==0).to_numpy() & f_high_low_good & (df2["High"]!=df2["Low"]).to_numpy()
f_high_low_good = (~df2["High"].isna().to_numpy()) & (~df2["Low"].isna().to_numpy())
f_change = df2["High"].to_numpy() != df2["Low"].to_numpy()
f_vol_bad = (df2["Volume"]==0).to_numpy() & f_high_low_good & f_change
# Check whether worth attempting repair
f_prices_bad = f_prices_bad.to_numpy()
f_bad_rows = f_prices_bad.any(axis=1) | f_vol_bad
if not f_bad_rows.any():
if debug:
print("no bad data to repair")
logger.info("No bad data (price=0) to repair")
if not "Repaired?" in df.columns:
df["Repaired?"] = False
return df
if f_prices_bad.sum() == len(price_cols)*len(df2):
# Need some good data to calibrate
if debug:
print("no good data to calibrate")
logger.warning("No good data for calibration so cannot fix price=0 bad data")
if not "Repaired?" in df.columns:
df["Repaired?"] = False
return df
data_cols = price_cols + ["Volume"]
@@ -892,37 +975,39 @@ class TickerBase:
f_vol_zero_or_nan = (df2["Volume"].to_numpy()==0) | (df2["Volume"].isna().to_numpy())
df2.loc[f_prices_bad.any(axis=1) & f_vol_zero_or_nan, "Volume"] = tag
# If volume=0 or NaN but price moved in interval, then tag volume for repair
f_change = df2["High"].to_numpy() != df2["Low"].to_numpy()
df2.loc[f_change & f_vol_zero_or_nan, "Volume"] = tag
n_before = (df2[data_cols].to_numpy()==tag).sum()
dts_tagged = df2.index[(df2[data_cols].to_numpy()==tag).any(axis=1)]
df2 = self._reconstruct_intervals_batch(df2, interval, prepost, tag, silent)
n_after = (df2[data_cols].to_numpy()==tag).sum()
dts_not_repaired = df2.index[(df2[data_cols].to_numpy()==tag).any(axis=1)]
df2_tagged = df2[data_cols].to_numpy()==tag
n_before = df2_tagged.sum()
dts_tagged = df2.index[df2_tagged.any(axis=1)]
df3 = self._reconstruct_intervals_batch(df2, interval, prepost, tag, silent)
df3_tagged = df3[data_cols].to_numpy()==tag
n_after = df3_tagged.sum()
dts_not_repaired = df3.index[df3_tagged.any(axis=1)]
n_fixed = n_before - n_after
if not silent and n_fixed > 0:
msg = f"{self.ticker}: fixed {n_fixed}/{n_before} value=0 errors in {interval} price data"
if n_fixed < 4:
dts_repaired = sorted(list(set(dts_tagged).difference(dts_not_repaired)))
msg += f": {dts_repaired}"
print(msg)
logger.info('%s', msg)
if df2_reserve is not None:
df2 = _pd.concat([df2, df2_reserve])
df2 = df2.sort_index()
if not "Repaired?" in df2_reserve.columns:
df2_reserve["Repaired?"] = False
df3 = _pd.concat([df3, df2_reserve]).sort_index()
# Restore original values where repair failed (i.e. remove tag values)
f = df2[data_cols].values==tag
f = df3[data_cols].to_numpy()==tag
for j in range(len(data_cols)):
fj = f[:,j]
if fj.any():
c = data_cols[j]
df2.loc[fj, c] = df.loc[fj, c]
df3.loc[fj, c] = df.loc[fj, c]
return df2
return df3
def _get_ticker_tz(self, debug_mode, proxy, timeout):
def _get_ticker_tz(self, proxy, timeout):
if self._tz is not None:
return self._tz
cache = utils.get_tz_cache()
@@ -934,7 +1019,7 @@ class TickerBase:
tz = None
if tz is None:
tz = self._fetch_ticker_tz(debug_mode, proxy, timeout)
tz = self._fetch_ticker_tz(proxy, timeout)
if utils.is_valid_timezone(tz):
# info fetch is relatively slow so cache timezone
@@ -945,7 +1030,7 @@ class TickerBase:
self._tz = tz
return tz
def _fetch_ticker_tz(self, debug_mode, proxy, timeout):
def _fetch_ticker_tz(self, proxy, timeout):
# Query Yahoo for fast price data just to get returned timezone
params = {"range": "1d", "interval": "1d"}
@@ -957,25 +1042,22 @@ class TickerBase:
data = self._data.cache_get(url=url, params=params, proxy=proxy, timeout=timeout)
data = data.json()
except Exception as e:
if debug_mode:
print("Failed to get ticker '{}' reason: {}".format(self.ticker, e))
logger.error("Failed to get ticker '{}' reason: {}".format(self.ticker, e))
return None
else:
error = data.get('chart', {}).get('error', None)
if error:
# explicit error from yahoo API
if debug_mode:
print("Got error from yahoo api for ticker {}, Error: {}".format(self.ticker, error))
logger.debug("Got error from yahoo api for ticker {}, Error: {}".format(self.ticker, error))
else:
try:
return data["chart"]["result"][0]["meta"]["exchangeTimezoneName"]
except Exception as err:
if debug_mode:
print("Could not get exchangeTimezoneName for ticker '{}' reason: {}".format(self.ticker, err))
print("Got response: ")
print("-------------")
print(" {}".format(data))
print("-------------")
logger.error("Could not get exchangeTimezoneName for ticker '{}' reason: {}".format(self.ticker, err))
logger.debug("Got response: ")
logger.debug("-------------")
logger.debug(" {}".format(data))
logger.debug("-------------")
return None
def get_recommendations(self, proxy=None, as_dict=False):
@@ -1022,11 +1104,13 @@ class TickerBase:
@property
def fast_info(self):
if self._fast_info is None:
self._fast_info = FastInfo(self)
return self._fast_info
@property
def basic_info(self):
print("WARNING: 'Ticker.basic_info' is renamed to 'Ticker.fast_info', hopefully purpose is clearer")
warnings.warn("'Ticker.basic_info' is renamed to 'Ticker.fast_info', hopefully purpose is clearer", DeprecationWarning)
return self.fast_info
def get_sustainability(self, proxy=None, as_dict=False):
@@ -1255,7 +1339,7 @@ class TickerBase:
def get_shares_full(self, start=None, end=None, proxy=None):
# Process dates
tz = self._get_ticker_tz(debug_mode=False, proxy=None, timeout=10)
tz = self._get_ticker_tz(proxy=None, timeout=10)
dt_now = _pd.Timestamp.utcnow().tz_convert(tz)
if start is not None:
start_ts = utils._parse_user_dt(start, tz)
@@ -1270,7 +1354,7 @@ class TickerBase:
if start is None:
start = end - _pd.Timedelta(days=548) # 18 months
if start >= end:
print("ERROR: start date must be before end")
logger.error("Start date must be before end")
return None
start = start.floor("D")
end = end.ceil("D")
@@ -1282,14 +1366,14 @@ class TickerBase:
json_str = self._data.cache_get(shares_url).text
json_data = _json.loads(json_str)
except:
print(f"{self.ticker}: Yahoo web request for share count failed")
logger.error("%s: Yahoo web request for share count failed", self.ticker)
return None
try:
fail = json_data["finance"]["error"]["code"] == "Bad Request"
except:
fail = False
if fail:
print(f"{self.ticker}: Yahoo web request for share count failed")
logger.error(f"%s: Yahoo web request for share count failed", self.ticker)
return None
shares_data = json_data["timeseries"]["result"]
@@ -1298,7 +1382,7 @@ class TickerBase:
try:
df = _pd.Series(shares_data[0]["shares_out"], index=_pd.to_datetime(shares_data[0]["timestamp"], unit="s"))
except Exception as e:
print(f"{self.ticker}: Failed to parse shares count data: "+str(e))
logger.error(f"%s: Failed to parse shares count data: %s", self.ticker, e)
return None
df.index = df.index.tz_localize(tz)
@@ -1413,7 +1497,7 @@ class TickerBase:
if dates is None or dates.shape[0] == 0:
err_msg = "No earnings dates found, symbol may be delisted"
print('- %s: %s' % (self.ticker, err_msg))
logger.error('%s: %s', self.ticker, err_msg)
return None
dates = dates.reset_index(drop=True)
@@ -1441,7 +1525,7 @@ class TickerBase:
dates[cn] = _pd.to_datetime(dates[cn], format="%b %d, %Y, %I %p")
# - instead of attempting decoding of ambiguous timezone abbreviation, just use 'info':
self._quote.proxy = proxy
tz = self._get_ticker_tz(debug_mode=False, proxy=proxy, timeout=30)
tz = self._get_ticker_tz(proxy=proxy, timeout=30)
dates[cn] = dates[cn].dt.tz_localize(tz)
dates = dates.set_index("Earnings Date")
@@ -1454,4 +1538,9 @@ class TickerBase:
if self._history_metadata is None:
# Request intraday data, because then Yahoo returns exchange schedule.
self.history(period="1wk", interval="1h", prepost=True)
if self._history_metadata_formatted is False:
self._history_metadata = utils.format_history_metadata(self._history_metadata)
self._history_metadata_formatted = True
return self._history_metadata

View File

@@ -1,6 +1,7 @@
import functools
from functools import lru_cache
import logging
import hashlib
from base64 import b64decode
usePycryptodome = False # slightly faster
@@ -25,8 +26,12 @@ try:
except ImportError:
import json as json
from . import utils
cache_maxsize = 64
logger = utils.get_yf_logger()
def lru_cache_freezeargs(func):
"""
@@ -297,11 +302,11 @@ class TickerData:
# Gather decryption keys:
soup = BeautifulSoup(response.content, "html.parser")
keys = self._get_decryption_keys_from_yahoo_js(soup)
# if len(keys) == 0:
# msg = "No decryption keys could be extracted from JS file."
# if "requests_cache" in str(type(response)):
# msg += " Try flushing your 'requests_cache', probably parsing old JS."
# print("WARNING: " + msg + " Falling back to backup decrypt methods.")
if len(keys) == 0:
msg = "No decryption keys could be extracted from JS file."
if "requests_cache" in str(type(response)):
msg += " Try flushing your 'requests_cache', probably parsing old JS."
logger.warning("%s Falling back to backup decrypt methods.", msg)
if len(keys) == 0:
keys = []
try:

View File

@@ -21,6 +21,8 @@
from __future__ import print_function
import logging
import traceback
import time as _time
import multitasking as _multitasking
import pandas as _pd
@@ -28,11 +30,10 @@ import pandas as _pd
from . import Ticker, utils
from . import shared
def download(tickers, start=None, end=None, actions=False, threads=True, ignore_tz=None,
group_by='column', auto_adjust=False, back_adjust=False, repair=False, keepna=False,
progress=True, period="max", show_errors=True, interval="1d", prepost=False,
proxy=None, rounding=False, timeout=10):
progress=True, period="max", show_errors=None, interval="1d", prepost=False,
proxy=None, rounding=False, timeout=10, session=None):
"""Download yahoo tickers
:Parameters:
tickers : str, list
@@ -77,11 +78,22 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
Optional. Round values to 2 decimal places?
show_errors: bool
Optional. Doesn't print errors if False
DEPRECATED, will be removed in future version
timeout: None or float
If not None stops waiting for a response after given number of
seconds. (Can also be a fraction of a second e.g. 0.01)
session: None or Session
Optional. Pass your own session object to be used for all requests
"""
if show_errors is not None:
if show_errors:
utils.print_once(f"yfinance: download(show_errors={show_errors}) argument is deprecated and will be removed in future version. Do this instead: logging.getLogger('yfinance').setLevel(logging.ERROR)")
logging.getLogger('yfinance').setLevel(logging.ERROR)
else:
utils.print_once(f"yfinance: download(show_errors={show_errors}) argument is deprecated and will be removed in future version. Do this instead to suppress error messages: logging.getLogger('yfinance').setLevel(logging.CRITICAL)")
logging.getLogger('yfinance').setLevel(logging.CRITICAL)
if ignore_tz is None:
# Set default value depending on interval
if interval[1:] in ['m', 'h']:
@@ -100,7 +112,7 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
for ticker in tickers:
if utils.is_isin(ticker):
isin = ticker
ticker = utils.get_ticker_by_isin(ticker, proxy)
ticker = utils.get_ticker_by_isin(ticker, proxy, session=session)
shared._ISINS[ticker] = isin
_tickers_.append(ticker)
@@ -114,6 +126,7 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
# reset shared._DFS
shared._DFS = {}
shared._ERRORS = {}
shared._TRACEBACKS = {}
# download using threads
if threads:
@@ -126,10 +139,9 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
actions=actions, auto_adjust=auto_adjust,
back_adjust=back_adjust, repair=repair, keepna=keepna,
progress=(progress and i > 0), proxy=proxy,
rounding=rounding, timeout=timeout)
rounding=rounding, timeout=timeout, session=session)
while len(shared._DFS) < len(tickers):
_time.sleep(0.01)
# download synchronously
else:
for i, ticker in enumerate(tickers):
@@ -138,20 +150,40 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
actions=actions, auto_adjust=auto_adjust,
back_adjust=back_adjust, repair=repair, keepna=keepna,
proxy=proxy,
rounding=rounding, timeout=timeout)
shared._DFS[ticker.upper()] = data
rounding=rounding, timeout=timeout, session=session)
if progress:
shared._PROGRESS_BAR.animate()
if progress:
shared._PROGRESS_BAR.completed()
if shared._ERRORS and show_errors:
print('\n%.f Failed download%s:' % (
if shared._ERRORS:
# Send errors to logging module
logger = utils.get_yf_logger()
logger.error('\n%.f Failed download%s:' % (
len(shared._ERRORS), 's' if len(shared._ERRORS) > 1 else ''))
# print(shared._ERRORS)
print("\n".join(['- %s: %s' %
v for v in list(shared._ERRORS.items())]))
# Log each distinct error once, with list of symbols affected
errors = {}
for ticker in shared._ERRORS:
err = shared._ERRORS[ticker]
if not err in errors:
errors[err] = [ticker]
else:
errors[err].append(ticker)
for err in errors.keys():
logger.error(f'{errors[err]}: ' + err)
# Log each distinct traceback once, with list of symbols affected
tbs = {}
for ticker in shared._TRACEBACKS:
tb = shared._TRACEBACKS[ticker]
if not tb in tbs:
tbs[tb] = [ticker]
else:
tbs[tb].append(ticker)
for tb in tbs.keys():
logger.debug(f'{tbs[tb]}: ' + tb)
if ignore_tz:
for tkr in shared._DFS.keys():
@@ -208,17 +240,10 @@ def _download_one_threaded(ticker, start=None, end=None,
auto_adjust=False, back_adjust=False, repair=False,
actions=False, progress=True, period="max",
interval="1d", prepost=False, proxy=None,
keepna=False, rounding=False, timeout=10):
try:
data = _download_one(ticker, start, end, auto_adjust, back_adjust, repair,
actions, period, interval, prepost, proxy, rounding,
keepna, timeout)
except Exception as e:
# glob try/except needed as current thead implementation breaks if exception is raised.
shared._DFS[ticker] = utils.empty_df()
shared._ERRORS[ticker] = repr(e)
else:
shared._DFS[ticker.upper()] = data
keepna=False, rounding=False, timeout=10, session=None):
data = _download_one(ticker, start, end, auto_adjust, back_adjust, repair,
actions, period, interval, prepost, proxy, rounding,
keepna, timeout, session)
if progress:
shared._PROGRESS_BAR.animate()
@@ -227,12 +252,23 @@ def _download_one(ticker, start=None, end=None,
auto_adjust=False, back_adjust=False, repair=False,
actions=False, period="max", interval="1d",
prepost=False, proxy=None, rounding=False,
keepna=False, timeout=10):
return Ticker(ticker).history(
period=period, interval=interval,
start=start, end=end, prepost=prepost,
actions=actions, auto_adjust=auto_adjust,
back_adjust=back_adjust, repair=repair, proxy=proxy,
rounding=rounding, keepna=keepna, timeout=timeout,
debug=False, raise_errors=False # debug and raise_errors false to not log and raise errors in threads
)
keepna=False, timeout=10, session=None):
data = None
try:
data = Ticker(ticker, session=session).history(
period=period, interval=interval,
start=start, end=end, prepost=prepost,
actions=actions, auto_adjust=auto_adjust,
back_adjust=back_adjust, repair=repair, proxy=proxy,
rounding=rounding, keepna=keepna, timeout=timeout,
raise_errors=True
)
except Exception as e:
# glob try/except needed as current thead implementation breaks if exception is raised.
shared._DFS[ticker.upper()] = utils.empty_df()
shared._ERRORS[ticker.upper()] = repr(e)
shared._TRACEBACKS[ticker.upper()] = traceback.format_exc()
else:
shared._DFS[ticker.upper()] = data
return data

View File

@@ -58,7 +58,7 @@ class Analysis:
analysis_data = analysis_data['QuoteSummaryStore']
except KeyError as e:
err_msg = "No analysis data found, symbol may be delisted"
print('- %s: %s' % (self._data.ticker, err_msg))
logger.error('%s: %s', self._data.ticker, err_msg)
return
if isinstance(analysis_data.get('earningsTrend'), dict):

View File

@@ -1,4 +1,5 @@
import datetime
import logging
import json
import pandas as pd
@@ -8,6 +9,7 @@ from yfinance import utils
from yfinance.data import TickerData
from yfinance.exceptions import YFinanceDataException, YFinanceException
logger = utils.get_yf_logger()
class Fundamentals:
@@ -50,7 +52,7 @@ class Fundamentals:
self._fin_data_quote = self._financials_data['QuoteSummaryStore']
except KeyError:
err_msg = "No financials data found, symbol may be delisted"
print('- %s: %s' % (self._data.ticker, err_msg))
logger.error('%s: %s', self._data.ticker, err_msg)
return None
def _scrape_earnings(self, proxy):
@@ -144,7 +146,7 @@ class Financials:
if statement is not None:
return statement
except YFinanceException as e:
print(f"- {self._data.ticker}: Failed to create {name} financials table for reason: {repr(e)}")
logger.error("%s: Failed to create %s financials table for reason: %r", self._data.ticker, name, e)
return pd.DataFrame()
def _create_financials_table(self, name, timescale, proxy):
@@ -267,7 +269,7 @@ class Financials:
if statement is not None:
return statement
except YFinanceException as e:
print(f"- {self._data.ticker}: Failed to create financials table for {name} reason: {repr(e)}")
logger.error("%s: Failed to create financials table for %s reason: %r", self._data.ticker, name, e)
return pd.DataFrame()
def _create_financials_table_old(self, name, timescale, proxy):

View File

@@ -1,11 +1,15 @@
import datetime
import logging
import json
import warnings
import pandas as pd
import numpy as _np
from yfinance import utils
from yfinance.data import TickerData
logger = utils.get_yf_logger()
info_retired_keys_price = {"currentPrice", "dayHigh", "dayLow", "open", "previousClose", "volume", "volume24Hr"}
info_retired_keys_price.update({"regularMarket"+s for s in ["DayHigh", "DayLow", "Open", "PreviousClose", "Price", "Volume"]})
@@ -45,16 +49,16 @@ class InfoDictWrapper(MutableMapping):
def __getitem__(self, k):
if k in info_retired_keys_price:
print(f"Price data removed from info (key='{k}'). Use Ticker.fast_info or history() instead")
warnings.warn(f"Price data removed from info (key='{k}'). Use Ticker.fast_info or history() instead", DeprecationWarning)
return None
elif k in info_retired_keys_exchange:
print(f"Exchange data removed from info (key='{k}'). Use Ticker.fast_info or Ticker.get_history_metadata() instead")
warnings.warn(f"Exchange data removed from info (key='{k}'). Use Ticker.fast_info or Ticker.get_history_metadata() instead", DeprecationWarning)
return None
elif k in info_retired_keys_marketCap:
print(f"Market cap removed from info (key='{k}'). Use Ticker.fast_info instead")
warnings.warn(f"Market cap removed from info (key='{k}'). Use Ticker.fast_info instead", DeprecationWarning)
return None
elif k in info_retired_keys_symbol:
print(f"Symbol removed from info (key='{k}'). You know this already")
warnings.warn(f"Symbol removed from info (key='{k}'). You know this already", DeprecationWarning)
return None
return self.info[self._keytransform(k)]
@@ -78,7 +82,7 @@ class FastInfo:
# Contain small subset of info[] items that can be fetched faster elsewhere.
# Imitates a dict.
def __init__(self, tickerBaseObject):
utils.print_once("Note: 'info' dict is now fixed & improved, 'fast_info' no longer faster")
utils.print_once("yfinance: Note: 'Ticker.info' dict is now fixed & improved, 'fast_info' is no longer faster")
self._tkr = tickerBaseObject
@@ -174,7 +178,11 @@ class FastInfo:
def _get_1y_prices(self, fullDaysOnly=False):
if self._prices_1y is None:
self._prices_1y = self._tkr.history(period="380d", auto_adjust=False, debug=False, keepna=True)
# Temporarily disable error printing
l = logger.level
logger.setLevel(logging.CRITICAL)
self._prices_1y = self._tkr.history(period="380d", auto_adjust=False, keepna=True)
logger.setLevel(l)
self._md = self._tkr.get_history_metadata()
try:
ctp = self._md["currentTradingPeriod"]
@@ -200,12 +208,20 @@ class FastInfo:
def _get_1wk_1h_prepost_prices(self):
if self._prices_1wk_1h_prepost is None:
self._prices_1wk_1h_prepost = self._tkr.history(period="1wk", interval="1h", auto_adjust=False, prepost=True, debug=False)
# Temporarily disable error printing
l = logger.level
logger.setLevel(logging.CRITICAL)
self._prices_1wk_1h_prepost = self._tkr.history(period="1wk", interval="1h", auto_adjust=False, prepost=True)
logger.setLevel(l)
return self._prices_1wk_1h_prepost
def _get_1wk_1h_reg_prices(self):
if self._prices_1wk_1h_reg is None:
self._prices_1wk_1h_reg = self._tkr.history(period="1wk", interval="1h", auto_adjust=False, prepost=False, debug=False)
# Temporarily disable error printing
l = logger.level
logger.setLevel(logging.CRITICAL)
self._prices_1wk_1h_reg = self._tkr.history(period="1wk", interval="1h", auto_adjust=False, prepost=False)
logger.setLevel(l)
return self._prices_1wk_1h_reg
def _get_exchange_metadata(self):
@@ -515,6 +531,8 @@ class FastInfo:
except Exception as e:
if "Cannot retrieve share count" in str(e):
shares = None
elif "failed to decrypt Yahoo" in str(e):
shares = None
else:
raise
@@ -586,7 +604,7 @@ class Quote:
quote_summary_store = json_data['QuoteSummaryStore']
except KeyError:
err_msg = "No summary info found, symbol may be delisted"
print('- %s: %s' % (self._data.ticker, err_msg))
logger.error('%s: %s', self._data.ticker, err_msg)
return None
# sustainability

View File

@@ -22,4 +22,5 @@
_DFS = {}
_PROGRESS_BAR = None
_ERRORS = {}
_TRACEBACKS = {}
_ISINS = {}

View File

@@ -87,10 +87,4 @@ class Tickers:
return data
def news(self):
collection = {}
for ticker in self.symbols:
collection[ticker] = []
items = Ticker(ticker).news
for item in items:
collection[ticker].append(item)
return collection
return {ticker: [item for item in Ticker(ticker).news] for ticker in self.symbols}

View File

@@ -36,6 +36,7 @@ import appdirs as _ad
import sqlite3 as _sqlite3
import atexit as _atexit
from functools import lru_cache
import logging
from threading import Lock
@@ -69,6 +70,20 @@ def print_once(msg):
print(msg)
yf_logger = None
def get_yf_logger():
global yf_logger
if yf_logger is None:
yf_logger = logging.getLogger("yfinance")
if yf_logger.handlers is None or len(yf_logger.handlers) == 0:
# Add stream handler if user not already added one
h = logging.StreamHandler()
formatter = logging.Formatter(fmt='%(levelname)s %(message)s')
h.setFormatter(formatter)
yf_logger.addHandler(h)
return yf_logger
def is_isin(string):
return bool(_re.match("^([A-Z]{2})([A-Z0-9]{9})([0-9]{1})$", string))
@@ -346,10 +361,10 @@ def _interval_to_timedelta(interval):
def auto_adjust(data):
col_order = data.columns
df = data.copy()
ratio = df["Close"] / df["Adj Close"]
df["Adj Open"] = df["Open"] / ratio
df["Adj High"] = df["High"] / ratio
df["Adj Low"] = df["Low"] / ratio
ratio = (df["Adj Close"] / df["Close"]).to_numpy()
df["Adj Open"] = df["Open"] * ratio
df["Adj High"] = df["High"] * ratio
df["Adj Low"] = df["Low"] * ratio
df.drop(
["Open", "High", "Low", "Close"],
@@ -412,12 +427,9 @@ def parse_quotes(data):
def parse_actions(data):
dividends = _pd.DataFrame(
columns=["Dividends"], index=_pd.DatetimeIndex([]))
capital_gains = _pd.DataFrame(
columns=["Capital Gains"], index=_pd.DatetimeIndex([]))
splits = _pd.DataFrame(
columns=["Stock Splits"], index=_pd.DatetimeIndex([]))
dividends = None
capital_gains = None
splits = None
if "events" in data:
if "dividends" in data["events"]:
@@ -446,6 +458,16 @@ def parse_actions(data):
splits["denominator"]
splits = splits[["Stock Splits"]]
if dividends is None:
dividends = _pd.DataFrame(
columns=["Dividends"], index=_pd.DatetimeIndex([]))
if capital_gains is None:
capital_gains = _pd.DataFrame(
columns=["Capital Gains"], index=_pd.DatetimeIndex([]))
if splits is None:
splits = _pd.DataFrame(
columns=["Stock Splits"], index=_pd.DatetimeIndex([]))
return dividends, splits, capital_gains
@@ -456,31 +478,30 @@ def set_df_tz(df, interval, tz):
return df
def fix_Yahoo_returning_prepost_unrequested(quotes, interval, metadata):
def fix_Yahoo_returning_prepost_unrequested(quotes, interval, tradingPeriods):
# Sometimes Yahoo returns post-market data despite not requesting it.
# Normally happens on half-day early closes.
#
# And sometimes returns pre-market data despite not requesting it.
# E.g. some London tickers.
tps_df = metadata["tradingPeriods"]
tps_df = tradingPeriods.copy()
tps_df["_date"] = tps_df.index.date
quotes["_date"] = quotes.index.date
idx = quotes.index.copy()
quotes = quotes.merge(tps_df, how="left", validate="many_to_one")
quotes = quotes.merge(tps_df, how="left")
quotes.index = idx
# "end" = end of regular trading hours (including any auction)
f_drop = quotes.index >= quotes["end"]
f_drop = f_drop | (quotes.index < quotes["start"])
if f_drop.any():
# When printing report, ignore rows that were already NaNs:
f_na = quotes[["Open","Close"]].isna().all(axis=1)
n_nna = quotes.shape[0] - _np.sum(f_na)
n_drop_nna = _np.sum(f_drop & ~f_na)
quotes_dropped = quotes[f_drop]
# f_na = quotes[["Open","Close"]].isna().all(axis=1)
# n_nna = quotes.shape[0] - _np.sum(f_na)
# n_drop_nna = _np.sum(f_drop & ~f_na)
# quotes_dropped = quotes[f_drop]
# if debug and n_drop_nna > 0:
# print(f"Dropping {n_drop_nna}/{n_nna} intervals for falling outside regular trading hours")
quotes = quotes[~f_drop]
metadata["tradingPeriods"] = tps_df.drop(["_date"], axis=1)
quotes = quotes.drop(["_date", "start", "end"], axis=1)
return quotes
@@ -519,16 +540,24 @@ def fix_Yahoo_returning_live_separate(quotes, interval, tz_exchange):
# Last two rows are within same interval
idx1 = quotes.index[n - 1]
idx2 = quotes.index[n - 2]
if idx1 == idx2:
# Yahoo returning last interval duplicated, which means
# Yahoo is not returning live data (phew!)
return quotes
if _np.isnan(quotes.loc[idx2, "Open"]):
quotes.loc[idx2, "Open"] = quotes["Open"][n - 1]
# Note: nanmax() & nanmin() ignores NaNs
quotes.loc[idx2, "High"] = _np.nanmax([quotes["High"][n - 1], quotes["High"][n - 2]])
quotes.loc[idx2, "Low"] = _np.nanmin([quotes["Low"][n - 1], quotes["Low"][n - 2]])
# Note: nanmax() & nanmin() ignores NaNs, but still need to check not all are NaN to avoid warnings
if not _np.isnan(quotes["High"][n - 1]):
quotes.loc[idx2, "High"] = _np.nanmax([quotes["High"][n - 1], quotes["High"][n - 2]])
if "Adj High" in quotes.columns:
quotes.loc[idx2, "Adj High"] = _np.nanmax([quotes["Adj High"][n - 1], quotes["Adj High"][n - 2]])
if not _np.isnan(quotes["Low"][n - 1]):
quotes.loc[idx2, "Low"] = _np.nanmin([quotes["Low"][n - 1], quotes["Low"][n - 2]])
if "Adj Low" in quotes.columns:
quotes.loc[idx2, "Adj Low"] = _np.nanmin([quotes["Adj Low"][n - 1], quotes["Adj Low"][n - 2]])
quotes.loc[idx2, "Close"] = quotes["Close"][n - 1]
if "Adj High" in quotes.columns:
quotes.loc[idx2, "Adj High"] = _np.nanmax([quotes["Adj High"][n - 1], quotes["Adj High"][n - 2]])
if "Adj Low" in quotes.columns:
quotes.loc[idx2, "Adj Low"] = _np.nanmin([quotes["Adj Low"][n - 1], quotes["Adj Low"][n - 2]])
if "Adj Close" in quotes.columns:
quotes.loc[idx2, "Adj Close"] = quotes["Adj Close"][n - 1]
quotes.loc[idx2, "Volume"] += quotes["Volume"][n - 1]
@@ -698,7 +727,7 @@ def is_valid_timezone(tz: str) -> bool:
return True
def format_history_metadata(md):
def format_history_metadata(md, tradingPeriodsOnly=True):
if not isinstance(md, dict):
return md
if len(md) == 0:
@@ -706,60 +735,54 @@ def format_history_metadata(md):
tz = md["exchangeTimezoneName"]
for k in ["firstTradeDate", "regularMarketTime"]:
if k in md and md[k] is not None:
md[k] = _pd.to_datetime(md[k], unit='s', utc=True).tz_convert(tz)
if not tradingPeriodsOnly:
for k in ["firstTradeDate", "regularMarketTime"]:
if k in md and md[k] is not None:
if isinstance(md[k], int):
md[k] = _pd.to_datetime(md[k], unit='s', utc=True).tz_convert(tz)
if "currentTradingPeriod" in md:
for m in ["regular", "pre", "post"]:
if m in md["currentTradingPeriod"]:
for t in ["start", "end"]:
md["currentTradingPeriod"][m][t] = \
_pd.to_datetime(md["currentTradingPeriod"][m][t], unit='s', utc=True).tz_convert(tz)
del md["currentTradingPeriod"][m]["gmtoffset"]
del md["currentTradingPeriod"][m]["timezone"]
if "tradingPeriods" in md:
if md["tradingPeriods"] == {"pre":[], "post":[]}:
del md["tradingPeriods"]
if "currentTradingPeriod" in md:
for m in ["regular", "pre", "post"]:
if m in md["currentTradingPeriod"] and isinstance(md["currentTradingPeriod"][m]["start"], int):
for t in ["start", "end"]:
md["currentTradingPeriod"][m][t] = \
_pd.to_datetime(md["currentTradingPeriod"][m][t], unit='s', utc=True).tz_convert(tz)
del md["currentTradingPeriod"][m]["gmtoffset"]
del md["currentTradingPeriod"][m]["timezone"]
if "tradingPeriods" in md:
tps = md["tradingPeriods"]
if isinstance(tps, list):
# Only regular times
regs_dict = [tps[i][0] for i in range(len(tps))]
pres_dict = None
posts_dict = None
elif isinstance(tps, dict):
# Includes pre- and post-market
pres_dict = [tps["pre"][i][0] for i in range(len(tps["pre"]))]
posts_dict = [tps["post"][i][0] for i in range(len(tps["post"]))]
regs_dict = [tps["regular"][i][0] for i in range(len(tps["regular"]))]
else:
raise Exception()
if tps == {"pre":[], "post":[]}:
# Ignore
pass
elif isinstance(tps, (list, dict)):
if isinstance(tps, list):
# Only regular times
df = _pd.DataFrame.from_records(_np.hstack(tps))
df = df.drop(["timezone", "gmtoffset"], axis=1)
df["start"] = _pd.to_datetime(df["start"], unit='s', utc=True).dt.tz_convert(tz)
df["end"] = _pd.to_datetime(df["end"], unit='s', utc=True).dt.tz_convert(tz)
elif isinstance(tps, dict):
# Includes pre- and post-market
pre_df = _pd.DataFrame.from_records(_np.hstack(tps["pre"]))
post_df = _pd.DataFrame.from_records(_np.hstack(tps["post"]))
regular_df = _pd.DataFrame.from_records(_np.hstack(tps["regular"]))
pre_df = pre_df.rename(columns={"start":"pre_start", "end":"pre_end"}).drop(["timezone", "gmtoffset"], axis=1)
post_df = post_df.rename(columns={"start":"post_start", "end":"post_end"}).drop(["timezone", "gmtoffset"], axis=1)
regular_df = regular_df.drop(["timezone", "gmtoffset"], axis=1)
cols = ["pre_start", "pre_end", "start", "end", "post_start", "post_end"]
df = regular_df.join(pre_df).join(post_df)
for c in cols:
df[c] = _pd.to_datetime(df[c], unit='s', utc=True).dt.tz_convert(tz)
df = df[cols]
def _dict_to_table(d):
df = _pd.DataFrame.from_dict(d).drop(["timezone", "gmtoffset"], axis=1)
df["end"] = _pd.to_datetime(df["end"], unit='s', utc=True).dt.tz_convert(tz)
df["start"] = _pd.to_datetime(df["start"], unit='s', utc=True).dt.tz_convert(tz)
df.index = _pd.to_datetime(df["start"].dt.date)
df.index = df.index.tz_localize(tz)
return df
df.index.name = "Date"
df = _dict_to_table(regs_dict)
df_cols = ["start", "end"]
if pres_dict is not None:
pre_df = _dict_to_table(pres_dict)
df = df.merge(pre_df.rename(columns={"start":"pre_start", "end":"pre_end"}), left_index=True, right_index=True)
df_cols = ["pre_start", "pre_end"]+df_cols
if posts_dict is not None:
post_df = _dict_to_table(posts_dict)
df = df.merge(post_df.rename(columns={"start":"post_start", "end":"post_end"}), left_index=True, right_index=True)
df_cols = df_cols+["post_start", "post_end"]
df = df[df_cols]
df.index.name = "Date"
md["tradingPeriods"] = df
md["tradingPeriods"] = df
return md
@@ -844,14 +867,21 @@ class _KVStore:
def get(self, key: str) -> Union[str, None]:
"""Get value for key if it exists else returns None"""
item = self.conn.execute('select value from "kv" where key=?', (key,))
try:
item = self.conn.execute('select value from "kv" where key=?', (key,))
except _sqlite3.IntegrityError as e:
self.delete(key)
return None
if item:
return next(item, (None,))[0]
def set(self, key: str, value: str) -> None:
with self._cache_mutex:
self.conn.execute('replace into "kv" (key, value) values (?,?)', (key, value))
self.conn.commit()
if value is None:
self.delete(key)
else:
with self._cache_mutex:
self.conn.execute('replace into "kv" (key, value) values (?,?)', (key, value))
self.conn.commit()
def bulk_set(self, kvdata: Dict[str, str]):
records = tuple(i for i in kvdata.items())
@@ -917,11 +947,23 @@ class _TzCache:
if not _os.path.isfile(old_cache_file_path):
return None
try:
df = _pd.read_csv(old_cache_file_path, index_col="Ticker")
df = _pd.read_csv(old_cache_file_path, index_col="Ticker", on_bad_lines="skip")
except _pd.errors.EmptyDataError:
_os.remove(old_cache_file_path)
except TypeError:
_os.remove(old_cache_file_path)
else:
self.tz_db.bulk_set(df.to_dict()['Tz'])
# Discard corrupt data:
df = df[~df["Tz"].isna().to_numpy()]
df = df[~(df["Tz"]=='').to_numpy()]
df = df[~df.index.isna()]
if not df.empty:
try:
self.tz_db.bulk_set(df.to_dict()['Tz'])
except Exception as e:
# Ignore
pass
_os.remove(old_cache_file_path)
@@ -952,9 +994,10 @@ def get_tz_cache():
try:
_tz_cache = _TzCache()
except _TzCacheException as err:
print("Failed to create TzCache, reason: {}".format(err))
print("TzCache will not be used.")
print("Tip: You can direct cache to use a different location with 'set_tz_cache_location(mylocation)'")
logger.error("Failed to create TzCache, reason: %s. "
"TzCache will not be used. "
"Tip: You can direct cache to use a different location with 'set_tz_cache_location(mylocation)'",
err)
_tz_cache = _TzCacheDummy()
return _tz_cache

View File

@@ -1 +1 @@
version = "0.2.15"
version = "0.2.20"