Bump version to 0.2.29

Merge pull request #1692 from ranaroussi/dev
sync dev -> main
2023-09-22 12:00:47 +01:00 · 2023-09-22 12:00:06 +01:00 · 2023-09-22 11:27:38 +01:00 · 2023-09-22 11:24:30 +01:00 · 2023-09-22 11:21:17 +01:00 · 2023-09-19 19:35:03 +01:00
10 changed files with 257 additions and 211 deletions
--- a/.github/ISSUE_TEMPLATE/bug_report.yaml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yaml
@@ -42,6 +42,13 @@ body:

        Provide the following as best you can:

+  - type: textarea
+    id: summary
+    attributes:
+      label: "Describe bug"
+    validations:
+      required: true
+
  - type: textarea
    id: code
    attributes:
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -1,6 +1,14 @@
 Change Log
 ===========

+0.2.29
+------
+- Fix pandas warning when retrieving quotes. #1672
+- Replace sqlite3 with peewee for 100% thread-safety #1675
+- Fix merging events with intraday prices #1684
+- Fix error when calling enable_debug_mode twice #1687
+- Price repair fixes #1688
+
 0.2.28
 ------
 - Fix TypeError: 'FastInfo' object is not callable #1636
--- a/README.md
+++ b/README.md
@@ -258,6 +258,7 @@ To install `yfinance` using `conda`, see
 -   [frozendict](https://pypi.org/project/frozendict) \>= 2.3.4
 -   [beautifulsoup4](https://pypi.org/project/beautifulsoup4) \>= 4.11.1
 -   [html5lib](https://pypi.org/project/html5lib) \>= 1.1
+-   [peewee](https://pypi.org/project/peewee)  \>= 3.16.2

 #### Optional (if you want to use `pandas_datareader`)

--- a/meta.yaml
+++ b/meta.yaml
@@ -1,5 +1,5 @@
 {% set name = "yfinance" %}
-{% set version = "0.2.28" %}
+{% set version = "0.2.29" %}

 package:
  name: "{{ name|lower }}"
@@ -26,6 +26,7 @@ requirements:
    - frozendict >=2.3.4
    - beautifulsoup4 >=4.11.1
    - html5lib >=1.1
+    - peewee >=3.16.2
    # - pycryptodome >=3.6.6
    - pip
    - python
@@ -41,6 +42,7 @@ requirements:
    - frozendict >=2.3.4
    - beautifulsoup4 >=4.11.1
    - html5lib >=1.1
+    - peewee >=3.16.2
    # - pycryptodome >=3.6.6
    - python

--- a/requirements.txt
+++ b/requirements.txt
@@ -8,3 +8,4 @@ pytz>=2022.5
 frozendict>=2.3.4
 beautifulsoup4>=4.11.1
 html5lib>=1.1
+peewee>=3.16.2
--- a/setup.py
+++ b/setup.py
@@ -62,7 +62,7 @@ setup(
    install_requires=['pandas>=1.3.0', 'numpy>=1.16.5',
                      'requests>=2.31', 'multitasking>=0.0.7',
                      'lxml>=4.9.1', 'appdirs>=1.4.4', 'pytz>=2022.5',
-                      'frozendict>=2.3.4', 
+                      'frozendict>=2.3.4', 'peewee>=3.16.2',
                      'beautifulsoup4>=4.11.1', 'html5lib>=1.1'],
    # Note: Pandas.read_html() needs html5lib & beautifulsoup4
    entry_points={
--- a/tests/prices.py
+++ b/tests/prices.py
@@ -114,6 +114,43 @@ class TestPriceHistory(unittest.TestCase):
        if not test_run:
            self.skipTest("Skipping test_duplicatingWeekly() because not possible to fail Monday/weekend")

+    def test_pricesEventsMerge(self):
+        # Test case: dividend occurs after last row in price data
+        tkr = 'INTC'
+        start_d = _dt.date(2022, 1, 1)
+        end_d = _dt.date(2023, 1, 1)
+        df = yf.Ticker(tkr, session=self.session).history(interval='1d', start=start_d, end=end_d)
+        div = 1.0
+        future_div_dt = df.index[-1] + _dt.timedelta(days=1)
+        if future_div_dt.weekday() in [5, 6]:
+            future_div_dt += _dt.timedelta(days=1) * (7 - future_div_dt.weekday())
+        divs = _pd.DataFrame(data={"Dividends":[div]}, index=[future_div_dt])
+        df2 = yf.utils.safe_merge_dfs(df.drop(['Dividends', 'Stock Splits'], axis=1), divs, '1d')
+        self.assertIn(future_div_dt, df2.index)
+        self.assertIn("Dividends", df2.columns)
+        self.assertEqual(df2['Dividends'].iloc[-1], div)
+
+    def test_pricesEventsMerge_bug(self):
+        # Reproduce exception when merging intraday prices with future dividend
+        tkr = 'S32.AX'
+        interval = '30m'
+        df_index = []
+        d = 13
+        for h in range(0, 16):
+            for m in [0, 30]:
+                df_index.append(_dt.datetime(2023, 9, d, h, m))
+        df_index.append(_dt.datetime(2023, 9, d, 16))
+        df = _pd.DataFrame(index=df_index)
+        df.index = _pd.to_datetime(df.index)
+        df['Close'] = 1.0
+
+        div = 1.0
+        future_div_dt = _dt.datetime(2023, 9, 14, 10)
+        divs = _pd.DataFrame(data={"Dividends":[div]}, index=[future_div_dt])
+
+        df2 = yf.utils.safe_merge_dfs(df, divs, interval)
+        # No exception = test pass
+
    def test_intraDayWithEvents(self):
        tkrs = ["BHP.AX", "IMP.JO", "BP.L", "PNL.L", "INTC"]
        test_run = False
--- a/yfinance/base.py
+++ b/yfinance/base.py
@@ -829,6 +829,8 @@ class TickerBase:

    @utils.log_indent_decorator
    def _fix_unit_mixups(self, df, interval, tz_exchange, prepost):
+        if df.empty:
+            return df
        df2 = self._fix_unit_switch(df, interval, tz_exchange)
        df3 = self._fix_unit_random_mixups(df2, interval, tz_exchange, prepost)
        return df3
@@ -842,6 +844,9 @@ class TickerBase:
        # - a sudden switch between $<->cents at some date
        # This function fixes the first.

+        if df.empty:
+            return df
+
        # Easy to detect and fix, just look for outliers = ~100x local median
        logger = utils.get_yf_logger()

@@ -885,7 +890,11 @@ class TickerBase:
        ratio = df2_data / median
        ratio_rounded = (ratio / 20).round() * 20  # round ratio to nearest 20
        f = ratio_rounded == 100
-        if not f.any():
+        ratio_rcp = 1.0/ratio
+        ratio_rcp_rounded = (ratio_rcp / 20).round() * 20  # round ratio to nearest 20
+        f_rcp = (ratio_rounded == 100) | (ratio_rcp_rounded == 100)
+        f_either = f | f_rcp
+        if not f_either.any():
            logger.info("price-repair-100x: No sporadic 100x errors")
            if "Repaired?" not in df.columns:
                df["Repaired?"] = False
@@ -894,7 +903,7 @@ class TickerBase:
        # Mark values to send for repair
        tag = -1.0
        for i in range(len(data_cols)):
-            fi = f[:, i]
+            fi = f_either[:, i]
            c = data_cols[i]
            df2.loc[fi, c] = tag

@@ -906,35 +915,43 @@ class TickerBase:
        if n_after > 0:
            # This second pass will *crudely* "fix" any remaining errors in High/Low
            # simply by ensuring they don't contradict e.g. Low = 100x High.
-            f = df2_tagged
+            f = (df2[data_cols].to_numpy() == tag) & f
            for i in range(f.shape[0]):
                fi = f[i, :]
                if not fi.any():
                    continue
                idx = df2.index[i]

-                c = "Open"
-                j = data_cols.index(c)
-                if fi[j]:
-                    df2.loc[idx, c] = df.loc[idx, c] * 0.01
-                #
-                c = "Close"
-                j = data_cols.index(c)
-                if fi[j]:
-                    df2.loc[idx, c] = df.loc[idx, c] * 0.01
-                #
-                c = "Adj Close"
-                j = data_cols.index(c)
-                if fi[j]:
-                    df2.loc[idx, c] = df.loc[idx, c] * 0.01
-                #
-                c = "High"
-                j = data_cols.index(c)
+                for c in ['Open', 'Close']:
+                    j = data_cols.index(c)
+                    if fi[j]:
+                        df2.loc[idx, c] = df.loc[idx, c] * 0.01
+
+                c = "High" ; j = data_cols.index(c)
                if fi[j]:
                    df2.loc[idx, c] = df2.loc[idx, ["Open", "Close"]].max()
-                #
-                c = "Low"
-                j = data_cols.index(c)
+
+                c = "Low" ; j = data_cols.index(c)
+                if fi[j]:
+                    df2.loc[idx, c] = df2.loc[idx, ["Open", "Close"]].min()
+
+            f_rcp = (df2[data_cols].to_numpy() == tag) & f_rcp
+            for i in range(f_rcp.shape[0]):
+                fi = f_rcp[i, :]
+                if not fi.any():
+                    continue
+                idx = df2.index[i]
+
+                for c in ['Open', 'Close']:
+                    j = data_cols.index(c)
+                    if fi[j]:
+                        df2.loc[idx, c] = df.loc[idx, c] * 100.0
+
+                c = "High" ; j = data_cols.index(c)
+                if fi[j]:
+                    df2.loc[idx, c] = df2.loc[idx, ["Open", "Close"]].max()
+
+                c = "Low" ; j = data_cols.index(c)
                if fi[j]:
                    df2.loc[idx, c] = df2.loc[idx, ["Open", "Close"]].min()

@@ -953,9 +970,9 @@ class TickerBase:
            logger.info('price-repair-100x: ' + report_msg)

        # Restore original values where repair failed
-        f = df2_tagged
+        f_either = df2[data_cols].to_numpy() == tag
        for j in range(len(data_cols)):
-            fj = f[:, j]
+            fj = f_either[:, j]
            if fj.any():
                c = data_cols[j]
                df2.loc[fj, c] = df.loc[fj, c]
@@ -977,14 +994,6 @@ class TickerBase:
        # This function fixes the second.
        # Eventually Yahoo fixes but could take them 2 weeks.

-        # To detect, use 'bad split adjustment' algorithm. But only correct
-        # if no stock splits in data
-
-        f_splits = df['Stock Splits'].to_numpy() != 0.0
-        if f_splits.any():
-            utils.get_yf_logger().debug('price-repair-100x: Cannot check for chunked 100x errors because splits present')
-            return df
-
        return self._fix_prices_sudden_change(df, interval, tz_exchange, 100.0)

    @utils.log_indent_decorator
@@ -993,6 +1002,9 @@ class TickerBase:
        # But most times when prices=0 or NaN returned is because no trades.
        # Impossible to distinguish, so only attempt repair if few or rare.

+        if df.empty:
+            return df
+
        logger = utils.get_yf_logger()

        if df.shape[0] == 0:
@@ -1101,6 +1113,9 @@ class TickerBase:
        # Easy to detect and correct BUT ONLY IF the data 'df' includes today's dividend.
        # E.g. if fetching historic prices before todays dividend, then cannot fix.

+        if df.empty:
+            return df
+
        logger = utils.get_yf_logger()

        if df is None or df.empty:
@@ -1173,6 +1188,9 @@ class TickerBase:
        # which direction to reverse adjustment - have to analyse prices and detect. 
        # Not difficult.

+        if df.empty:
+            return df
+            
        logger = utils.get_yf_logger()

        interday = interval in ['1d', '1wk', '1mo', '3mo']
@@ -1198,6 +1216,9 @@ class TickerBase:

    @utils.log_indent_decorator
    def _fix_prices_sudden_change(self, df, interval, tz_exchange, change, correct_volume=False):
+        if df.empty:
+            return df
+            
        logger = utils.get_yf_logger()

        df = df.sort_index(ascending=False)
@@ -1262,11 +1283,25 @@ class TickerBase:
            # Avoid using 'Low' and 'High'. For multiday intervals, these can be 
            # very volatile so reduce ability to detect genuine stock split errors
            _1d_change_x = np.full((n, 2), 1.0)
-            price_data = df2[['Open','Close']].replace(0.0, 1.0).to_numpy()
+            price_data = df2[['Open','Close']].to_numpy()
+            f_zero = price_data == 0.0
        else:
            _1d_change_x = np.full((n, 4), 1.0)
-            price_data = df2[OHLC].replace(0.0, 1.0).to_numpy()
+            price_data = df2[OHLC].to_numpy()
+            f_zero = price_data == 0.0
+        if f_zero.any():
+            price_data[f_zero] = 1.0
+
+        # Update: if a VERY large dividend is paid out, then can be mistaken for a 1:2 stock split.
+        # Fix = use adjusted prices
+        adj = df2['Adj Close'].to_numpy() / df2['Close'].to_numpy()
+        for j in range(price_data.shape[1]):
+            price_data[:,j] *= adj
+
        _1d_change_x[1:] = price_data[1:, ] / price_data[:-1, ]
+        f_zero_num_denom = f_zero | np.roll(f_zero, 1, axis=0)
+        if f_zero_num_denom.any():
+            _1d_change_x[f_zero_num_denom] = 1.0
        if interday and interval != '1d':
            # average change
            _1d_change_minx = np.average(_1d_change_x, axis=1)
@@ -1365,6 +1400,29 @@ class TickerBase:
            logger.info(f'price-repair-split: No {fix_type}s detected')
            return df

+        # Update: if any 100x changes are soon after a stock split, so could be confused with split error, then abort
+        threshold_days = 30
+        f_splits = df['Stock Splits'].to_numpy() != 0.0
+        if change in [100.0, 0.01] and f_splits.any():
+            indices_A = np.where(f_splits)[0]
+            indices_B = np.where(f)[0]
+            if not len(indices_A) or not len(indices_B):
+                return None
+            gaps = indices_B[:, None] - indices_A
+            # Because data is sorted in DEscending order, need to flip gaps
+            gaps *= -1
+            f_pos = gaps > 0
+            if f_pos.any():
+                gap_min = gaps[f_pos].min()
+                gap_td = utils._interval_to_timedelta(interval) * gap_min
+                if isinstance(gap_td, _dateutil.relativedelta.relativedelta):
+                    threshold = _dateutil.relativedelta.relativedelta(days=threshold_days)
+                else:
+                    threshold = _datetime.timedelta(days=threshold_days)
+                if gap_td < threshold:
+                    logger.info(f'price-repair-split: 100x changes are too soon after stock split events, aborting')
+                    return df
+
        # if logger.isEnabledFor(logging.DEBUG):
        #     df_debug['i'] = list(range(0, df_debug.shape[0]))
        #     df_debug['i_rev'] = df_debug.shape[0]-1 - df_debug['i']
--- a/yfinance/utils.py
+++ b/yfinance/utils.py
@@ -27,7 +27,7 @@ import datetime as _datetime
 import logging
 import os as _os
 import re as _re
-import sqlite3 as _sqlite3
+import peewee as _peewee
 import sys as _sys
 import threading
 from functools import lru_cache
@@ -169,14 +169,15 @@ def setup_debug_formatting():
        yf_logger.warning("logging mode not set to 'DEBUG', so not setting up debug formatting")
        return

-    if yf_logger.handlers is None or len(yf_logger.handlers) == 0:
-        h = logging.StreamHandler()
-        # Ensure different level strings don't interfere with indentation
-        formatter = MultiLineFormatter(fmt='%(levelname)-8s %(message)s')
-        h.setFormatter(formatter)
-        yf_logger.addHandler(h)
-
    global yf_log_indented
+    if not yf_log_indented:
+        if yf_logger.handlers is None or len(yf_logger.handlers) == 0:
+            h = logging.StreamHandler()
+            # Ensure different level strings don't interfere with indentation
+            formatter = MultiLineFormatter(fmt='%(levelname)-8s %(message)s')
+            h.setFormatter(formatter)
+            yf_logger.addHandler(h)
+
    yf_log_indented = True


@@ -628,30 +629,32 @@ def fix_Yahoo_returning_live_separate(quotes, interval, tz_exchange):
                    # Yahoo is not returning live data (phew!)
                    return quotes
                if _np.isnan(quotes.loc[idx2, "Open"]):
-                    quotes.loc[idx2, "Open"] = quotes["Open"][n - 1]
+                    quotes.loc[idx2, "Open"] = quotes["Open"].iloc[n - 1]
                # Note: nanmax() & nanmin() ignores NaNs, but still need to check not all are NaN to avoid warnings
-                if not _np.isnan(quotes["High"][n - 1]):
-                    quotes.loc[idx2, "High"] = _np.nanmax([quotes["High"][n - 1], quotes["High"][n - 2]])
+                if not _np.isnan(quotes["High"].iloc[n - 1]):
+                    quotes.loc[idx2, "High"] = _np.nanmax([quotes["High"].iloc[n - 1], quotes["High"].iloc[n - 2]])
                    if "Adj High" in quotes.columns:
-                        quotes.loc[idx2, "Adj High"] = _np.nanmax([quotes["Adj High"][n - 1], quotes["Adj High"][n - 2]])
+                        quotes.loc[idx2, "Adj High"] = _np.nanmax([quotes["Adj High"].iloc[n - 1], quotes["Adj High"].iloc[n - 2]])

-                if not _np.isnan(quotes["Low"][n - 1]):
-                    quotes.loc[idx2, "Low"] = _np.nanmin([quotes["Low"][n - 1], quotes["Low"][n - 2]])
+                if not _np.isnan(quotes["Low"].iloc[n - 1]):
+                    quotes.loc[idx2, "Low"] = _np.nanmin([quotes["Low"].iloc[n - 1], quotes["Low"].iloc[n - 2]])
                    if "Adj Low" in quotes.columns:
-                        quotes.loc[idx2, "Adj Low"] = _np.nanmin([quotes["Adj Low"][n - 1], quotes["Adj Low"][n - 2]])
+                        quotes.loc[idx2, "Adj Low"] = _np.nanmin([quotes["Adj Low"].iloc[n - 1], quotes["Adj Low"].iloc[n - 2]])

-                quotes.loc[idx2, "Close"] = quotes["Close"][n - 1]
+                quotes.loc[idx2, "Close"] = quotes["Close"].iloc[n - 1]
                if "Adj Close" in quotes.columns:
-                    quotes.loc[idx2, "Adj Close"] = quotes["Adj Close"][n - 1]
-                quotes.loc[idx2, "Volume"] += quotes["Volume"][n - 1]
+                    quotes.loc[idx2, "Adj Close"] = quotes["Adj Close"].iloc[n - 1]
+                quotes.loc[idx2, "Volume"] += quotes["Volume"].iloc[n - 1]
                quotes = quotes.drop(quotes.index[n - 1])

    return quotes


 def safe_merge_dfs(df_main, df_sub, interval):
-    if df_sub.shape[0] == 0:
+    if df_sub.empty:
        raise Exception("No data to merge")
+    if df_main.empty:
+        return df_main

    df_sub_backup = df_sub.copy()
    data_cols = [c for c in df_sub.columns if c not in df_main]
@@ -675,47 +678,54 @@ def safe_merge_dfs(df_main, df_sub, interval):
    else:
        indices = _np.searchsorted(_np.append(df_main.index, df_main.index[-1] + td), df_sub.index, side='right')
        indices -= 1  # Convert from [[i-1], [i]) to [[i], [i+1])
-        # Numpy.searchsorted does not handle out-of-range well, so handle manually:
-        for i in range(len(df_sub.index)):
-            dt = df_sub.index[i]
-            if dt < df_main.index[0] or dt >= df_main.index[-1] + td:
-                # Out-of-range
-                indices[i] = -1
+    # Numpy.searchsorted does not handle out-of-range well, so handle manually:
+    for i in range(len(df_sub.index)):
+        dt = df_sub.index[i]
+        if dt < df_main.index[0] or dt >= df_main.index[-1] + td:
+            # Out-of-range
+            indices[i] = -1

    f_outOfRange = indices == -1
-    if f_outOfRange.any() and not intraday:
-        empty_row_data = {c:[_np.nan] for c in const.price_colnames}|{'Volume':[0]}
-        if interval == '1d':
-            # For 1d, add all out-of-range event dates
-            for i in _np.where(f_outOfRange)[0]:
-                dt = df_sub.index[i]
-                get_yf_logger().debug(f"Adding out-of-range {data_col} @ {dt.date()} in new prices row of NaNs")
-                empty_row = _pd.DataFrame(data=empty_row_data, index=[dt])
-                df_main = _pd.concat([df_main, empty_row], sort=True)
+    if f_outOfRange.any():
+        if intraday:
+            # Discard out-of-range dividends in intraday data, assume user not interested
+            df_sub = df_sub[~f_outOfRange]
+            if df_sub.empty:
+                df_main['Dividends'] = 0.0
+                return df_main
        else:
-            # Else, only add out-of-range event dates if occurring in interval 
-            # immediately after last pricfe row
-            last_dt = df_main.index[-1]
-            next_interval_start_dt = last_dt + td
-            next_interval_end_dt = next_interval_start_dt + td
-            for i in _np.where(f_outOfRange)[0]:
-                dt = df_sub.index[i]
-                if next_interval_start_dt <= dt < next_interval_end_dt:
-                    new_dt = next_interval_start_dt
+            empty_row_data = {c:[_np.nan] for c in const.price_colnames}|{'Volume':[0]}
+            if interval == '1d':
+                # For 1d, add all out-of-range event dates
+                for i in _np.where(f_outOfRange)[0]:
+                    dt = df_sub.index[i]
                    get_yf_logger().debug(f"Adding out-of-range {data_col} @ {dt.date()} in new prices row of NaNs")
                    empty_row = _pd.DataFrame(data=empty_row_data, index=[dt])
                    df_main = _pd.concat([df_main, empty_row], sort=True)
-        df_main = df_main.sort_index()
+            else:
+                # Else, only add out-of-range event dates if occurring in interval 
+                # immediately after last price row
+                last_dt = df_main.index[-1]
+                next_interval_start_dt = last_dt + td
+                next_interval_end_dt = next_interval_start_dt + td
+                for i in _np.where(f_outOfRange)[0]:
+                    dt = df_sub.index[i]
+                    if next_interval_start_dt <= dt < next_interval_end_dt:
+                        new_dt = next_interval_start_dt
+                        get_yf_logger().debug(f"Adding out-of-range {data_col} @ {dt.date()} in new prices row of NaNs")
+                        empty_row = _pd.DataFrame(data=empty_row_data, index=[dt])
+                        df_main = _pd.concat([df_main, empty_row], sort=True)
+            df_main = df_main.sort_index()

-        # Re-calculate indices
-        indices = _np.searchsorted(_np.append(df_main.index, df_main.index[-1] + td), df_sub.index, side='right')
-        indices -= 1  # Convert from [[i-1], [i]) to [[i], [i+1])
-        # Numpy.searchsorted does not handle out-of-range well, so handle manually:
-        for i in range(len(df_sub.index)):
-            dt = df_sub.index[i]
-            if dt < df_main.index[0] or dt >= df_main.index[-1] + td:
-                # Out-of-range
-                indices[i] = -1
+            # Re-calculate indices
+            indices = _np.searchsorted(_np.append(df_main.index, df_main.index[-1] + td), df_sub.index, side='right')
+            indices -= 1  # Convert from [[i-1], [i]) to [[i], [i+1])
+            # Numpy.searchsorted does not handle out-of-range well, so handle manually:
+            for i in range(len(df_sub.index)):
+                dt = df_sub.index[i]
+                if dt < df_main.index[0] or dt >= df_main.index[-1] + td:
+                    # Out-of-range
+                    indices[i] = -1

    f_outOfRange = indices == -1
    if f_outOfRange.any():
@@ -891,136 +901,60 @@ class ProgressBar:
 # TimeZone cache related code
 # ---------------------------------

-class _KVStore:
-    """Simple Sqlite backed key/value store, key and value are strings. Should be thread safe."""

-    def __init__(self, filename):
-        self._cache_mutex = Lock()
-        with self._cache_mutex:
-            self.conn = _sqlite3.connect(filename, timeout=10, check_same_thread=False)
-            self.conn.execute('pragma journal_mode=wal')
-            try:
-                self.conn.execute('create table if not exists "kv" (key TEXT primary key, value TEXT) without rowid')
-            except Exception as e:
-                if 'near "without": syntax error' in str(e):
-                    # "without rowid" requires sqlite 3.8.2. Older versions will raise exception
-                    self.conn.execute('create table if not exists "kv" (key TEXT primary key, value TEXT)')
-                else:
-                    raise
-            self.conn.commit()
-        _atexit.register(self.close)
-
-    def close(self):
-        if self.conn is not None:
-            with self._cache_mutex:
-                self.conn.close()
-                self.conn = None
-
-    def get(self, key: str) -> Union[str, None]:
-        """Get value for key if it exists else returns None"""
-        try:
-            item = self.conn.execute('select value from "kv" where key=?', (key,))
-        except _sqlite3.IntegrityError as e:
-            self.delete(key)
-            return None
-        if item:
-            return next(item, (None,))[0]
-
-    def set(self, key: str, value: str) -> None:
-        if value is None:
-            self.delete(key)
-        else:
-            with self._cache_mutex:
-                self.conn.execute('replace into "kv" (key, value) values (?,?)', (key, value))
-                self.conn.commit()
-
-    def bulk_set(self, kvdata: Dict[str, str]):
-        records = tuple(i for i in kvdata.items())
-        with self._cache_mutex:
-            self.conn.executemany('replace into "kv" (key, value) values (?,?)', records)
-            self.conn.commit()
-
-    def delete(self, key: str):
-        with self._cache_mutex:
-            self.conn.execute('delete from "kv" where key=?', (key,))
-            self.conn.commit()
+_cache_dir = _os.path.join(_ad.user_cache_dir(), "py-yfinance")
+DB_PATH = _os.path.join(_cache_dir, 'tkr-tz.db')
+db = _peewee.SqliteDatabase(DB_PATH, pragmas={'journal_mode': 'wal', 'cache_size': -64})
+_tz_cache = None


 class _TzCacheException(Exception):
    pass


+class KV(_peewee.Model):
+    key = _peewee.CharField(primary_key=True)
+    value = _peewee.CharField(null=True)
+    
+    class Meta:
+        database = db
+        without_rowid = True
+
+
 class _TzCache:
-    """Simple sqlite file cache of ticker->timezone"""
-
    def __init__(self):
-        self._setup_cache_folder()
-        # Must init db here, where is thread-safe
+        db.connect()
+        db.create_tables([KV])
+
+        old_cache_file_path = _os.path.join(_cache_dir, "tkr-tz.csv")
+        if _os.path.isfile(old_cache_file_path):
+            _os.remove(old_cache_file_path)
+
+    def lookup(self, key):
        try:
-            self._tz_db = _KVStore(_os.path.join(self._db_dir, "tkr-tz.db"))
-        except _sqlite3.DatabaseError as err:
-            raise _TzCacheException(f"Error creating TzCache folder: '{self._db_dir}' reason: {err}")
-        self._migrate_cache_tkr_tz()
-
-    def _setup_cache_folder(self):
-        if not _os.path.isdir(self._db_dir):
-            try:
-                _os.makedirs(self._db_dir)
-            except OSError as err:
-                raise _TzCacheException(f"Error creating TzCache folder: '{self._db_dir}' reason: {err}")
-
-        elif not (_os.access(self._db_dir, _os.R_OK) and _os.access(self._db_dir, _os.W_OK)):
-            raise _TzCacheException(f"Cannot read and write in TzCache folder: '{self._db_dir}'")
-
-    def lookup(self, tkr):
-        return self.tz_db.get(tkr)
-
-    def store(self, tkr, tz):
-        if tz is None:
-            self.tz_db.delete(tkr)
-        else:
-            tz_db = self.tz_db.get(tkr)
-            if tz_db is not None:
-                if tz != tz_db:
-                    get_yf_logger().debug(f'{tkr}: Overwriting cached TZ "{tz_db}" with different TZ "{tz}"')
-                    self.tz_db.set(tkr, tz)
-            else:
-                self.tz_db.set(tkr, tz)
-
-    @property
-    def _db_dir(self):
-        global _cache_dir
-        return _os.path.join(_cache_dir, "py-yfinance")
-
-    @property
-    def tz_db(self):
-        return self._tz_db
-
-    def _migrate_cache_tkr_tz(self):
-        """Migrate contents from old ticker CSV-cache to SQLite db"""
-        old_cache_file_path = _os.path.join(self._db_dir, "tkr-tz.csv")
-
-        if not _os.path.isfile(old_cache_file_path):
+            return KV.get(KV.key == key).value
+        except KV.DoesNotExist:
            return None
-        try:
-            df = _pd.read_csv(old_cache_file_path, index_col="Ticker", on_bad_lines="skip")
-        except _pd.errors.EmptyDataError:
-            _os.remove(old_cache_file_path)
-        except TypeError:
-            _os.remove(old_cache_file_path)
-        else:
-            # Discard corrupt data:
-            df = df[~df["Tz"].isna().to_numpy()]
-            df = df[~(df["Tz"] == '').to_numpy()]
-            df = df[~df.index.isna()]
-            if not df.empty:
-                try:
-                    self.tz_db.bulk_set(df.to_dict()['Tz'])
-                except Exception as e:
-                    # Ignore
-                    pass

-            _os.remove(old_cache_file_path)
+    def store(self, key, value):
+        try:
+            if value is None:
+                q = KV.delete().where(KV.key == key)
+                q.execute()
+                return
+            with db.atomic():
+                KV.insert(key=key, value=value).execute()
+        except IntegrityError:
+            # Integrity error means the key already exists. Try updating the key.
+            old_value = self.lookup(key)
+            if old_value != value:
+                get_yf_logger().debug(f"Value for key {key} changed from {old_value} to {value}.")
+                with db.atomic():
+                    q = KV.update(value=value).where(KV.key == key)
+                    q.execute()
+
+    def close(self):
+        db.close()


 class _TzCacheDummy:
@@ -1058,9 +992,7 @@ def get_tz_cache():
        return _tz_cache


-_cache_dir = _ad.user_cache_dir()
 _cache_init_lock = Lock()
-_tz_cache = None


 def set_tz_cache_location(cache_dir: str):
--- a/yfinance/version.py
+++ b/yfinance/version.py
@@ -1 +1 @@
-version = "0.2.28"
+version = "0.2.29"
Author	SHA1	Message	Date
Value Raider	88525abcbd	Bump version to 0.2.29	2023-09-22 12:00:47 +01:00
ValueRaider	99ef055cc4	Merge pull request #1692 from ranaroussi/dev sync dev -> main	2023-09-22 12:00:06 +01:00
ValueRaider	0f36f7980b	Merge pull request #1688 from ranaroussi/fix/price-repair Price repair fixes	2023-09-22 11:27:38 +01:00
ValueRaider	8282af9ce4	Merge pull request #1684 from ranaroussi/fix/prices-intraday-merge-events Fix merging events with intraday prices	2023-09-22 11:24:30 +01:00
Value Raider	5208c8cf05	Price repair improvements Price repair improvements: - don't attempt repair of empty prices table - random-mixups: fix 0.01x errors, not just 100x - stop zeroes, big-dividends, and 100x errors triggering false split errors	2023-09-22 11:21:17 +01:00
Value Raider	d3dfb4c6a8	Fix merging events with intraday prices If Yahoo returns intraday price data with dividend or stock-split event in future, then this broke the merge. Fix is to discard out-of-range events. Assumes that if user requesting intraday then they aren't interested in events.	2023-09-19 19:35:03 +01:00
ValueRaider	279726afe4	Merge pull request #1687 from arduinocc04/arduinocc04-patch-2 Fix error when calling enable_debug_mode twice	2023-09-19 17:41:28 +01:00
조다니엘(Daniel Cho)	937386f3ef	Fix error when calling enable_debug_mode twice	2023-09-19 10:02:28 +09:00
ValueRaider	32e569f652	Merge pull request #1675 from ranaroussi/hotfix/database-error Replace sqlite3 with peewee for 100% thread-safety	2023-09-17 21:47:57 +01:00
ValueRaider	de59f0b2c6	Bug template - add section to describe bug	2023-09-09 18:32:32 +01:00
Value Raider	7d6d8562e8	Replace sqlite3 with peewee for 100% thread-safety	2023-09-03 16:47:36 +01:00
ValueRaider	6cae6d45b1	Merge pull request #1672 from difelice/fix-fix_yahoo_returning_live_separate-warnings Fix pandas warning when retrieving quotes.	2023-09-01 22:07:18 +01:00
Alessandro Di Felice	ec3de0710d	Fix pandas warning when retrieving quotes	2023-09-01 14:07:53 -05:00