Compare commits

..

17 Commits

Author SHA1 Message Date
Value Raider
c2d568367c Version 0.2.37 2024-02-25 13:25:28 +00:00
ValueRaider
d3728d3071 Merge pull request #1869 from ranaroussi/dev
Dev
2024-02-24 23:09:34 +00:00
ValueRaider
915bb1a080 Merge pull request #1866 from ranaroussi/fix/price-repair-confusing-order
Price repair bug-fix
2024-02-24 22:58:20 +00:00
Value Raider
d55c317158 Fix bug: prices order flipping during repair, introducing potential data corruption 2024-02-19 22:17:20 +00:00
ValueRaider
ac1d09049e Merge pull request #1865 from cottrell/fix
Fix some errors.
2024-02-19 22:11:48 +00:00
David Cottrell
afb4e0d5dc Fix some errors. 2024-02-19 21:43:12 +00:00
ValueRaider
1d31e7ca01 Update issue form - more emphasis on following instructions 2024-02-11 13:47:36 +00:00
ValueRaider
683064f9ad Merge pull request #1849 from ranaroussi/refactor/price-history 2024-02-07 23:11:51 +00:00
Value Raider
cdf897f9e6 Move price history+repair logic into new file 2024-02-04 13:09:37 +00:00
ValueRaider
eab6c8dfa7 Update bug_report.yaml because people can't read 2024-02-01 21:28:38 +00:00
ValueRaider
97f93d35ed Merge pull request #1844 from power-edge/dev
adding upgrade for pandas deprecation warning, adding pyarrow>=0.17.0…
2024-01-31 21:51:05 +00:00
Nikolaus Schuetz
5aef8addab removing dev requirements (they are included by extras) 2024-01-29 17:43:12 -05:00
ValueRaider
6b8a4a5608 Merge pull request #1841 from Rogach/pr/dont-disable-global-logging
do not disable app-wide logging in quote.py (fixes #1829)
2024-01-28 16:29:43 +00:00
Platon Pronko
212a7987c3 do not disable app-wide logging in quote.py (fixes #1829) 2024-01-28 20:43:50 +05:00
Nikolaus Schuetz
58a0a57457 adding upgrade for pandas deprecation warning, adding pyarrow>=0.17.0 at minimum requirement as defined in dev requirements for pandas==1.3.0 version 2024-01-26 20:12:58 -05:00
ValueRaider
75297c0eba Merge pull request #1838 from mreiche/bugfix/remove-empty-series
Remove _empty_series leftovers
2024-01-23 19:07:16 +00:00
Mike Reiche
1dc2719368 Remove _empty_series leftovers 2024-01-23 15:32:56 +01:00
13 changed files with 1721 additions and 1680 deletions

View File

@@ -6,23 +6,13 @@ body:
- type: markdown
attributes:
value: |
# IMPORTANT - Read and follow these instructions carefully. Help us help you.
### Does issue already exist?
Use the search tool. Don't annoy everyone by duplicating existing Issues.
# !!! IMPORTANT !!! FOLLOW THESE INSTRUCTIONS CAREFULLY !!!
### Are you up-to-date?
Upgrade to the latest version and confirm the issue/bug is still there.
Upgrade to the latest version: `$ pip install yfinance --upgrade --no-cache-dir`
`$ pip install yfinance --upgrade --no-cache-dir`
Confirm by running:
`import yfinance as yf ; print(yf.__version__)`
and comparing against [PIP](https://pypi.org/project/yfinance/#history).
Confirm latest version by running: `import yfinance as yf ; print(yf.__version__)` and comparing against [PyPI](https://pypi.org/project/yfinance/#history).
### Does Yahoo actually have the data?
@@ -34,6 +24,10 @@ body:
Yahoo Finance free service has rate-limiting https://github.com/ranaroussi/yfinance/discussions/1513. Once limit hit, Yahoo can delay, block, or return bad data -> not a `yfinance` bug.
### Does issue already exist?
Use the search tool. Don't duplicate existing issues.
- type: markdown
attributes:
value: |
@@ -61,7 +55,7 @@ body:
id: debug-log
attributes:
label: "Debug log"
description: "Run code with debug logging enabled and post the full output. Instructions: https://github.com/ranaroussi/yfinance/tree/main#logging"
description: "Run code with debug logging enabled and post the full output. IMPORTANT INSTRUCTIONS: https://github.com/ranaroussi/yfinance/tree/main#logging"
validations:
required: true

View File

@@ -1,6 +1,13 @@
Change Log
===========
0.2.37
------
Small fixes:
- Fix Pandas warnings #1838 #1844
- Fix price repair bug, typos, refactor #1866 #1865 #1849
- Stop disabling logging #1841
0.2.36
------
Small fixes:

View File

@@ -1,5 +1,5 @@
{% set name = "yfinance" %}
{% set version = "0.2.36" %}
{% set version = "0.2.37" %}
package:
name: "{{ name|lower }}"

View File

@@ -277,7 +277,7 @@ class TestPriceHistory(unittest.TestCase):
# Reproduce issue #1634 - 1d dividend out-of-range, should be prepended to prices
div_dt = _pd.Timestamp(2022, 7, 21).tz_localize("America/New_York")
df_dividends = _pd.DataFrame(data={"Dividends":[1.0]}, index=[div_dt])
df_prices = _pd.DataFrame(data={c:[1.0] for c in yf.const.price_colnames}|{'Volume':0}, index=[div_dt+_dt.timedelta(days=1)])
df_prices = _pd.DataFrame(data={c:[1.0] for c in yf.const._PRICE_COLNAMES_}|{'Volume':0}, index=[div_dt+_dt.timedelta(days=1)])
df_merged = yf.utils.safe_merge_dfs(df_prices, df_dividends, '1d')
self.assertEqual(df_merged.shape[0], 2)
self.assertTrue(df_merged[df_prices.columns].iloc[1:].equals(df_prices))
@@ -470,6 +470,18 @@ class TestPriceRepair(unittest.TestCase):
if cls.session is not None:
cls.session.close()
def test_types(self):
tkr = 'INTC'
dat = yf.Ticker(tkr, session=self.session)
data = dat.history(period="3mo", interval="1d", prepost=True, repair=True)
self.assertIsInstance(data, _pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
reconstructed = dat._lazy_load_price_history()._reconstruct_intervals_batch(data, "1wk", True)
self.assertIsInstance(reconstructed, _pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
def test_reconstruct_2m(self):
# 2m repair requires 1m data.
# Yahoo restricts 1m fetches to 7 days max within last 30 days.
@@ -494,6 +506,7 @@ class TestPriceRepair(unittest.TestCase):
tkr = "PNL.L"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.fast_info["timezone"]
hist = dat._lazy_load_price_history()
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
df = _pd.DataFrame(data={"Open": [470.5, 473.5, 474.5, 470],
@@ -517,7 +530,7 @@ class TestPriceRepair(unittest.TestCase):
# Run test
df_repaired = dat._fix_unit_random_mixups(df_bad, "1wk", tz_exchange, prepost=False)
df_repaired = hist._fix_unit_random_mixups(df_bad, "1wk", tz_exchange, prepost=False)
# First test - no errors left
for c in data_cols:
@@ -548,6 +561,7 @@ class TestPriceRepair(unittest.TestCase):
tkr = "PNL.L"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.fast_info["timezone"]
hist = dat._lazy_load_price_history()
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
df = _pd.DataFrame(data={"Open": [400, 398, 392.5, 417],
@@ -574,7 +588,7 @@ class TestPriceRepair(unittest.TestCase):
df.index = df.index.tz_localize(tz_exchange)
df_bad.index = df_bad.index.tz_localize(tz_exchange)
df_repaired = dat._fix_unit_random_mixups(df_bad, "1wk", tz_exchange, prepost=False)
df_repaired = hist._fix_unit_random_mixups(df_bad, "1wk", tz_exchange, prepost=False)
# First test - no errors left
for c in data_cols:
@@ -606,6 +620,7 @@ class TestPriceRepair(unittest.TestCase):
tkr = "PNL.L"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.fast_info["timezone"]
hist = dat._lazy_load_price_history()
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
df = _pd.DataFrame(data={"Open": [478, 476, 476, 472],
@@ -627,7 +642,7 @@ class TestPriceRepair(unittest.TestCase):
df.index = df.index.tz_localize(tz_exchange)
df_bad.index = df_bad.index.tz_localize(tz_exchange)
df_repaired = dat._fix_unit_random_mixups(df_bad, "1d", tz_exchange, prepost=False)
df_repaired = hist._fix_unit_random_mixups(df_bad, "1d", tz_exchange, prepost=False)
# First test - no errors left
for c in data_cols:
@@ -656,6 +671,7 @@ class TestPriceRepair(unittest.TestCase):
for interval in ['1d', '1wk']:
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.fast_info["timezone"]
hist = dat._lazy_load_price_history()
data_cols = ["Low", "High", "Open", "Close", "Adj Close"]
_dp = os.path.dirname(__file__)
@@ -672,7 +688,7 @@ class TestPriceRepair(unittest.TestCase):
df.index = _pd.to_datetime(df.index, utc=True).tz_convert(tz_exchange)
df = df.sort_index()
df_repaired = dat._fix_unit_switch(df_bad, interval, tz_exchange)
df_repaired = hist._fix_unit_switch(df_bad, interval, tz_exchange)
df_repaired = df_repaired.sort_index()
# First test - no errors left
@@ -704,6 +720,7 @@ class TestPriceRepair(unittest.TestCase):
def test_repair_zeroes_daily(self):
tkr = "BBIL.L"
dat = yf.Ticker(tkr, session=self.session)
hist = dat._lazy_load_price_history()
tz_exchange = dat.fast_info["timezone"]
df_bad = _pd.DataFrame(data={"Open": [0, 102.04, 102.04],
@@ -719,7 +736,7 @@ class TestPriceRepair(unittest.TestCase):
df_bad.index.name = "Date"
df_bad.index = df_bad.index.tz_localize(tz_exchange)
repaired_df = dat._fix_zeroes(df_bad, "1d", tz_exchange, prepost=False)
repaired_df = hist._fix_zeroes(df_bad, "1d", tz_exchange, prepost=False)
correct_df = df_bad.copy()
correct_df.loc["2022-11-01", "Open"] = 102.080002
@@ -753,6 +770,7 @@ class TestPriceRepair(unittest.TestCase):
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.fast_info["timezone"]
df.index = df.index.tz_localize(tz_exchange)
hist = dat._lazy_load_price_history()
rtol = 5e-3
for i in [0, 1, 2]:
@@ -761,7 +779,7 @@ class TestPriceRepair(unittest.TestCase):
df_slice_bad = df_slice.copy()
df_slice_bad.loc[df_slice_bad.index[j], "Adj Close"] = 0.0
df_slice_bad_repaired = dat._fix_zeroes(df_slice_bad, "1d", tz_exchange, prepost=False)
df_slice_bad_repaired = hist._fix_zeroes(df_slice_bad, "1d", tz_exchange, prepost=False)
for c in ["Close", "Adj Close"]:
self.assertTrue(_np.isclose(df_slice_bad_repaired[c], df_slice[c], rtol=rtol).all())
self.assertTrue("Repaired?" in df_slice_bad_repaired.columns)
@@ -771,8 +789,9 @@ class TestPriceRepair(unittest.TestCase):
tkr = "INTC"
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.fast_info["timezone"]
hist = dat._lazy_load_price_history()
correct_df = dat.history(period="1wk", interval="1h", auto_adjust=False, repair=True)
correct_df = hist.history(period="1wk", interval="1h", auto_adjust=False, repair=True)
df_bad = correct_df.copy()
bad_idx = correct_df.index[10]
@@ -783,7 +802,7 @@ class TestPriceRepair(unittest.TestCase):
df_bad.loc[bad_idx, "Adj Close"] = _np.nan
df_bad.loc[bad_idx, "Volume"] = 0
repaired_df = dat._fix_zeroes(df_bad, "1h", tz_exchange, prepost=False)
repaired_df = hist._fix_zeroes(df_bad, "1h", tz_exchange, prepost=False)
for c in ["Open", "Low", "High", "Close"]:
try:
@@ -812,11 +831,12 @@ class TestPriceRepair(unittest.TestCase):
for interval in intervals:
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.fast_info["timezone"]
hist = dat._lazy_load_price_history()
_dp = os.path.dirname(__file__)
df_good = dat.history(start='2020-01-01', end=_dt.date.today(), interval=interval, auto_adjust=False)
repaired_df = dat._fix_bad_stock_split(df_good, interval, tz_exchange)
repaired_df = hist._fix_bad_stock_split(df_good, interval, tz_exchange)
# Expect no change from repair
df_good = df_good.sort_index()
@@ -836,6 +856,7 @@ class TestPriceRepair(unittest.TestCase):
for tkr in bad_tkrs:
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.fast_info["timezone"]
hist = dat._lazy_load_price_history()
_dp = os.path.dirname(__file__)
interval = '1d'
@@ -846,7 +867,7 @@ class TestPriceRepair(unittest.TestCase):
df_bad = _pd.read_csv(fp, index_col="Date")
df_bad.index = _pd.to_datetime(df_bad.index, utc=True)
repaired_df = dat._fix_bad_stock_split(df_bad, "1d", tz_exchange)
repaired_df = hist._fix_bad_stock_split(df_bad, "1d", tz_exchange)
fp = os.path.join(_dp, "data", tkr.replace('.','-')+'-'+interval+"-bad-stock-split-fixed.csv")
correct_df = _pd.read_csv(fp, index_col="Date")
@@ -876,11 +897,12 @@ class TestPriceRepair(unittest.TestCase):
for interval in intervals:
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.fast_info["timezone"]
hist = dat._lazy_load_price_history()
_dp = os.path.dirname(__file__)
df_good = dat.history(start='2020-11-30', end='2021-04-01', interval=interval, auto_adjust=False)
df_good = hist.history(start='2020-11-30', end='2021-04-01', interval=interval, auto_adjust=False)
repaired_df = dat._fix_bad_stock_split(df_good, interval, tz_exchange)
repaired_df = hist._fix_bad_stock_split(df_good, interval, tz_exchange)
# Expect no change from repair
df_good = df_good.sort_index()
@@ -900,12 +922,13 @@ class TestPriceRepair(unittest.TestCase):
dat = yf.Ticker(tkr, session=self.session)
tz_exchange = dat.fast_info["timezone"]
hist = dat._lazy_load_price_history()
_dp = os.path.dirname(__file__)
df_bad = _pd.read_csv(os.path.join(_dp, "data", tkr.replace('.','-')+"-1d-missing-div-adjust.csv"), index_col="Date")
df_bad.index = _pd.to_datetime(df_bad.index)
repaired_df = dat._fix_missing_div_adjust(df_bad, "1d", tz_exchange)
repaired_df = hist._fix_missing_div_adjust(df_bad, "1d", tz_exchange)
correct_df = _pd.read_csv(os.path.join(_dp, "data", tkr.replace('.','-')+"-1d-missing-div-adjust-fixed.csv"), index_col="Date")
correct_df.index = _pd.to_datetime(correct_df.index)

View File

@@ -247,15 +247,6 @@ class TestTickerHistory(unittest.TestCase):
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
def test_reconstruct_intervals_batch(self):
data = self.ticker.history(period="3mo", interval="1d", prepost=True, repair=True)
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
reconstructed = self.ticker._reconstruct_intervals_batch(data, "1wk", True)
self.assertIsInstance(reconstructed, pd.DataFrame, "data has wrong type")
self.assertFalse(data.empty, "data is empty")
class TestTickerEarnings(unittest.TestCase):
session = None

View File

@@ -8,6 +8,8 @@ Specific test class:
python -m unittest tests.utils.TestTicker
"""
from unittest import TestSuite
# import pandas as pd
# import numpy as np
@@ -34,16 +36,16 @@ class TestCache(unittest.TestCase):
tkr = 'AMZN'
tz1 = "America/New_York"
tz2 = "London/Europe"
cache = yf.utils.get_tz_cache()
cache = yf.cache.get_tz_cache()
cache.store(tkr, tz1)
cache.store(tkr, tz2)
def test_setTzCacheLocation(self):
self.assertEqual(yf.utils._DBManager.get_location(), self.tempCacheDir.name)
self.assertEqual(yf.cache._TzDBManager.get_location(), self.tempCacheDir.name)
tkr = 'AMZN'
tz1 = "America/New_York"
cache = yf.utils.get_tz_cache()
cache = yf.cache.get_tz_cache()
cache.store(tkr, tz1)
self.assertTrue(os.path.exists(os.path.join(self.tempCacheDir.name, "tkr-tz.db")))
@@ -60,10 +62,10 @@ class TestCacheNoPermission(unittest.TestCase):
tz1 = "America/New_York"
# During attempt to store, will discover cannot write
yf.utils.get_tz_cache().store(tkr, tz1)
yf.cache.get_tz_cache().store(tkr, tz1)
# Handling the store failure replaces cache with a dummy
cache = yf.utils.get_tz_cache()
cache = yf.cache.get_tz_cache()
self.assertTrue(cache.dummy)
cache.store(tkr, tz1)
@@ -71,19 +73,19 @@ class TestCacheNoPermission(unittest.TestCase):
# Test that if cache path in read-only filesystem, no exception.
tkr = 'AMZN'
# During attempt to lookup, will discover cannot write
yf.utils.get_tz_cache().lookup(tkr)
yf.cache.get_tz_cache().lookup(tkr)
# Handling the lookup failure replaces cache with a dummy
cache = yf.utils.get_tz_cache()
cache = yf.cache.get_tz_cache()
self.assertTrue(cache.dummy)
cache.lookup(tkr)
def suite():
suite = unittest.TestSuite()
suite.addTest(TestCache('Test cache'))
suite.addTest(TestCacheNoPermission('Test cache no permission'))
return suite
ts: TestSuite = unittest.TestSuite()
ts.addTest(TestCache('Test cache'))
ts.addTest(TestCacheNoPermission('Test cache no permission'))
return ts
if __name__ == '__main__':

File diff suppressed because it is too large Load Diff

View File

@@ -115,7 +115,7 @@ fundamentals_keys = {
"PaymentstoSuppliersforGoodsandServices", "ClassesofCashReceiptsfromOperatingActivities",
"OtherCashReceiptsfromOperatingActivities", "ReceiptsfromGovernmentGrants", "ReceiptsfromCustomers"]}
price_colnames = ['Open', 'High', 'Low', 'Close', 'Adj Close']
_PRICE_COLNAMES_ = ['Open', 'High', 'Low', 'Close', 'Adj Close']
quote_summary_valid_modules = (
"summaryProfile", # contains general information about the company

1630
yfinance/scrapers/history.py Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,5 @@
import datetime
import json
import logging
import warnings
from collections.abc import MutableMapping
@@ -182,10 +181,7 @@ class FastInfo:
def _get_1y_prices(self, fullDaysOnly=False):
if self._prices_1y is None:
# Temporarily disable error printing
logging.disable(logging.CRITICAL)
self._prices_1y = self._tkr.history(period="380d", auto_adjust=False, keepna=True, proxy=self.proxy)
logging.disable(logging.NOTSET)
self._md = self._tkr.get_history_metadata(proxy=self.proxy)
try:
ctp = self._md["currentTradingPeriod"]
@@ -211,18 +207,12 @@ class FastInfo:
def _get_1wk_1h_prepost_prices(self):
if self._prices_1wk_1h_prepost is None:
# Temporarily disable error printing
logging.disable(logging.CRITICAL)
self._prices_1wk_1h_prepost = self._tkr.history(period="1wk", interval="1h", auto_adjust=False, prepost=True, proxy=self.proxy)
logging.disable(logging.NOTSET)
return self._prices_1wk_1h_prepost
def _get_1wk_1h_reg_prices(self):
if self._prices_1wk_1h_reg is None:
# Temporarily disable error printing
logging.disable(logging.CRITICAL)
self._prices_1wk_1h_reg = self._tkr.history(period="1wk", interval="1h", auto_adjust=False, prepost=False, proxy=self.proxy)
logging.disable(logging.NOTSET)
return self._prices_1wk_1h_reg
def _get_exchange_metadata(self):
@@ -261,8 +251,6 @@ class FastInfo:
if self._currency is not None:
return self._currency
if self._tkr._history_metadata is None:
self._get_1y_prices()
md = self._tkr.get_history_metadata(proxy=self.proxy)
self._currency = md["currency"]
return self._currency
@@ -272,8 +260,6 @@ class FastInfo:
if self._quote_type is not None:
return self._quote_type
if self._tkr._history_metadata is None:
self._get_1y_prices()
md = self._tkr.get_history_metadata(proxy=self.proxy)
self._quote_type = md["instrumentType"]
return self._quote_type

View File

@@ -27,6 +27,7 @@ from collections import namedtuple as _namedtuple
import pandas as _pd
from .base import TickerBase
from .const import _BASE_URL_
class Ticker(TickerBase):
@@ -40,9 +41,9 @@ class Ticker(TickerBase):
def _download_options(self, date=None):
if date is None:
url = f"{self._base_url}/v7/finance/options/{self.ticker}"
url = f"{_BASE_URL_}/v7/finance/options/{self.ticker}"
else:
url = f"{self._base_url}/v7/finance/options/{self.ticker}?date={date}"
url = f"{_BASE_URL_}/v7/finance/options/{self.ticker}?date={date}"
r = self._data.get(url=url, proxy=self.proxy).json()
if len(r.get('optionChain', {}).get('result', [])) > 0:

View File

@@ -689,7 +689,7 @@ def safe_merge_dfs(df_main, df_sub, interval):
df_main['Dividends'] = 0.0
return df_main
else:
empty_row_data = {**{c:[_np.nan] for c in const.price_colnames}, 'Volume':[0]}
empty_row_data = {**{c:[_np.nan] for c in const._PRICE_COLNAMES_}, 'Volume':[0]}
if interval == '1d':
# For 1d, add all out-of-range event dates
for i in _np.where(f_outOfRange)[0]:
@@ -772,7 +772,7 @@ def fix_Yahoo_dst_issue(df, interval):
f_pre_midnight = (df.index.minute == 0) & (df.index.hour.isin([22, 23]))
dst_error_hours = _np.array([0] * df.shape[0])
dst_error_hours[f_pre_midnight] = 24 - df.index[f_pre_midnight].hour
df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
df.index += _pd.to_timedelta(dst_error_hours, 'h')
return df

View File

@@ -1 +1 @@
version = "0.2.36"
version = "0.2.37"