Merge remote-tracking branch 'origin/master' into arkershaw-master

kernc · kernc · commit 4b3547e69c12 · 2025-03-30T08:27:38.000+02:00
diff --git a/.github/ISSUE_TEMPLATE/1-bug.yml b/.github/ISSUE_TEMPLATE/1-bug.yml
@@ -1,6 +1,5 @@
 name: Bug report
 description: File a new bug report. Please use the search
-title: "Short title loaded with keywords"
 body:
   - type: markdown
     attributes:
@@ -80,7 +79,13 @@ body:
       description: >
         Versions of the relevant software / packages.
       value: |
-        <!-- From `backtesting.__version__`. If git, use commit hash -->
-        - Backtesting version: 0.?.?
+        <!--
+            # Please paste the output of:
+            for pkg in ('backtesting', 'pandas', 'numpy', 'bokeh'):
+                print('-', pkg, getattr(__import__(pkg), '__version__', 'git'))
+        -->
+        - `backtesting.__version__`:
+        - `pandas.__version__`:
+        - `numpy.__version__`:
         - `bokeh.__version__`:
         - OS:
diff --git a/.github/ISSUE_TEMPLATE/2-enh.yml b/.github/ISSUE_TEMPLATE/2-enh.yml
@@ -1,6 +1,5 @@
 name: Enhancement proposal
 description: Describe the enhancement you'd like to see
-title: "Short title loaded with keywords"
 body:
   - type: markdown
     attributes:
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -4,6 +4,8 @@ on:
   pull_request: { branches: [master] }
   schedule: [ cron: '2 2 * * 6' ]  # Every Saturday, 02:02
 
+env:
+  TQDM_MININTERVAL: 10
 jobs:
   lint:
     runs-on: ubuntu-latest
diff --git a/.github/workflows/deploy-docs.yml b/.github/workflows/deploy-docs.yml
@@ -7,6 +7,8 @@ jobs:
   deploy:
     name: Deploy
     runs-on: ubuntu-latest
+    env:
+      TQDM_MININTERVAL: 10
 
     steps:
       - uses: actions/setup-python@v5
diff --git a/backtesting/__init__.py b/backtesting/__init__.py
@@ -68,3 +68,23 @@
 from . import lib  # noqa: F401
 from ._plotting import set_bokeh_output  # noqa: F401
 from .backtesting import Backtest, Strategy  # noqa: F401
+
+
+# Add overridable backtesting.Pool used for parallel optimization
+def Pool(processes=None, initializer=None, initargs=()):
+    import multiprocessing as mp
+    if mp.get_start_method() == 'spawn':
+        import warnings
+        warnings.warn(
+            "If you want to use multi-process optimization with "
+            "`multiprocessing.get_start_method() == 'spawn'` (e.g. on Windows),"
+            "set `backtesting.Pool = multiprocessing.Pool` (or of the desired context) "
+            "and hide `bt.optimize()` call behind a `if __name__ == '__main__'` guard. "
+            "Currently using thread-based paralellism, "
+            "which might be slightly slower for non-numpy / non-GIL-releasing code. "
+            "See https://github.com/kernc/backtesting.py/issues/1256",
+            category=RuntimeWarning, stacklevel=3)
+        from multiprocessing.dummy import Pool
+        return Pool(processes, initializer, initargs)
+    else:
+        return mp.Pool(processes, initializer, initargs)
diff --git a/backtesting/_plotting.py b/backtesting/_plotting.py
@@ -128,11 +128,11 @@ def _maybe_resample_data(resample_rule, df, indicators, equity_data, trades):
             "15min": 15,
             "30min": 30,
             "1h": 60,
-            "2h": 60*2,
-            "4h": 60*4,
-            "8h": 60*8,
-            "1D": 60*24,
-            "1W": 60*24*7,
+            "2h": 60 * 2,
+            "4h": 60 * 4,
+            "8h": 60 * 8,
+            "1D": 60 * 24,
+            "1W": 60 * 24 * 7,
             "1ME": np.inf,
         })
         timespan = df.index[-1] - df.index[0]
@@ -683,6 +683,7 @@ def __eq__(self, other):
             f.legend.margin = 0
             f.legend.label_text_font_size = '8pt'
             f.legend.click_policy = "hide"
+            f.legend.background_fill_alpha = .9
         f.min_border_left = 0
         f.min_border_top = 3
         f.min_border_bottom = 6
diff --git a/backtesting/_stats.py b/backtesting/_stats.py
@@ -137,12 +137,12 @@ def _round_timedelta(value, _period=_data_period(index)):
     # our risk doesn't; they use the simpler approach below.
     annualized_return = (1 + gmean_day_return)**annual_trading_days - 1
     s.loc['Return (Ann.) [%]'] = annualized_return * 100
-    s.loc['Volatility (Ann.) [%]'] = np.sqrt((day_returns.var(ddof=int(bool(day_returns.shape))) + (1 + gmean_day_return)**2)**annual_trading_days - (1 + gmean_day_return)**(2*annual_trading_days)) * 100  # noqa: E501
+    s.loc['Volatility (Ann.) [%]'] = np.sqrt((day_returns.var(ddof=int(bool(day_returns.shape))) + (1 + gmean_day_return)**2)**annual_trading_days - (1 + gmean_day_return)**(2 * annual_trading_days)) * 100  # noqa: E501
     # s.loc['Return (Ann.) [%]'] = gmean_day_return * annual_trading_days * 100
     # s.loc['Risk (Ann.) [%]'] = day_returns.std(ddof=1) * np.sqrt(annual_trading_days) * 100
     if is_datetime_index:
         time_in_years = (s.loc['Duration'].days + s.loc['Duration'].seconds / 86400) / annual_trading_days
-        s.loc['CAGR [%]'] = ((s.loc['Equity Final [$]'] / equity[0])**(1/time_in_years) - 1) * 100 if time_in_years else np.nan  # noqa: E501
+        s.loc['CAGR [%]'] = ((s.loc['Equity Final [$]'] / equity[0])**(1 / time_in_years) - 1) * 100 if time_in_years else np.nan  # noqa: E501
 
     # Our Sharpe mismatches `empyrical.sharpe_ratio()` because they use arithmetic mean return
     # and simple standard deviation
@@ -154,8 +154,11 @@ def _round_timedelta(value, _period=_data_period(index)):
     s.loc['Calmar Ratio'] = annualized_return / (-max_dd or np.nan)
     equity_log_returns = np.log(equity[1:] / equity[:-1])
     market_log_returns = np.log(c[1:] / c[:-1])
-    cov_matrix = np.cov(equity_log_returns, market_log_returns)
-    beta = cov_matrix[0, 1] / cov_matrix[1, 1]
+    beta = np.nan
+    if len(equity_log_returns) > 1 and len(market_log_returns) > 1:
+        # len == 0 on dummy call `stats_keys = compute_stats(...)` pre optimization
+        cov_matrix = np.cov(equity_log_returns, market_log_returns)
+        beta = cov_matrix[0, 1] / cov_matrix[1, 1]
     # Jensen CAPM Alpha: can be strongly positive when beta is negative and B&H Return is large
     s.loc['Alpha [%]'] = s.loc['Return [%]'] - risk_free_rate * 100 - beta * (s.loc['Buy & Hold Return [%]'] - risk_free_rate * 100)  # noqa: E501
     s.loc['Beta'] = beta
@@ -194,3 +197,15 @@ def __repr__(self):
             # 'format.na_rep', '--',  # TODO: Enable once it works
         ):
             return super().__repr__()
+
+
+def dummy_stats():
+    from .backtesting import Trade, _Broker
+    index = pd.DatetimeIndex(['2025'])
+    data = pd.DataFrame({col: [np.nan] for col in ('Close',)}, index=index)
+    trade = Trade(_Broker(data=data, cash=10000, spread=.01, commission=.01, margin=.1,
+                          trade_on_close=True, hedging=True, exclusive_orders=False, index=index),
+                  1, 1, 0, None)
+    trade._replace(exit_price=1, exit_bar=0)
+    trade._commissions = np.nan
+    return compute_stats([trade], np.r_[[np.nan]], data, None, 0)
diff --git a/backtesting/_util.py b/backtesting/_util.py
@@ -305,8 +305,11 @@ def arr2shm(self, vals):
         """Array to shared memory. Returns (shm_name, shape, dtype) used for restore."""
         assert vals.ndim == 1, (vals.ndim, vals.shape, vals)
         shm = self.SharedMemory(size=vals.nbytes, create=True)
-        buf = np.ndarray(vals.shape, dtype=vals.dtype, buffer=shm.buf)
-        buf[:] = vals[:]  # Copy into shared memory
+        # np.array can't handle pandas' tz-aware datetimes
+        # https://github.com/numpy/numpy/issues/18279
+        buf = np.ndarray(vals.shape, dtype=vals.dtype.base, buffer=shm.buf)
+        has_tz = getattr(vals.dtype, 'tz', None)
+        buf[:] = vals.tz_localize(None) if has_tz else vals  # Copy into shared memory
         return shm.name, vals.shape, vals.dtype
 
     def df2shm(self, df):
@@ -316,18 +319,18 @@ def df2shm(self, df):
         ))
 
     @staticmethod
-    def shm2arr(shm, shape, dtype):
-        arr = np.ndarray(shape, dtype=dtype, buffer=shm.buf)
+    def shm2s(shm, shape, dtype) -> pd.Series:
+        arr = np.ndarray(shape, dtype=dtype.base, buffer=shm.buf)
         arr.setflags(write=False)
-        return arr
+        return pd.Series(arr, dtype=dtype)
 
     _DF_INDEX_COL = '__bt_index'
 
     @staticmethod
     def shm2df(data_shm):
         shm = [SharedMemory(name=name, create=False, track=False) for _, name, _, _ in data_shm]
         df = pd.DataFrame({
-            col: SharedMemoryManager.shm2arr(shm, shape, dtype)
+            col: SharedMemoryManager.shm2s(shm, shape, dtype)
             for shm, (col, _, shape, dtype) in zip(shm, data_shm)})
         df.set_index(SharedMemoryManager._DF_INDEX_COL, drop=True, inplace=True)
         df.index.name = None
diff --git a/backtesting/backtesting.py b/backtesting/backtesting.py
@@ -8,7 +8,6 @@
 
 from __future__ import annotations
 
-import multiprocessing as mp
 import sys
 import warnings
 from abc import ABCMeta, abstractmethod
@@ -24,7 +23,7 @@
 from numpy.random import default_rng
 
 from ._plotting import plot  # noqa: I001
-from ._stats import compute_stats
+from ._stats import compute_stats, dummy_stats
 from ._util import (
     SharedMemoryManager, _as_str, _Indicator, _Data, _batch, _indicator_warmup_nbars,
     _strategy_indicators, patch, try_, _tqdm,
@@ -211,7 +210,7 @@ def next(self):
         """
 
     class __FULL_EQUITY(float):  # noqa: N801
-        def __repr__(self): return '.9999'
+        def __repr__(self): return '.9999'  # noqa: E704
     _FULL_EQUITY = __FULL_EQUITY(1 - sys.float_info.epsilon)
 
     def buy(self, *,
@@ -449,7 +448,7 @@ def __repr__(self):
                                                  ('tp', self.__tp_price),
                                                  ('contingent', self.is_contingent),
                                                  ('tag', self.__tag),
-                                             ) if value is not None))
+                                             ) if value is not None))  # noqa: E126
 
     def cancel(self):
         """Cancel the order."""
@@ -578,7 +577,7 @@ def __init__(self, broker: '_Broker', size: int, entry_price: float, entry_bar,
     def __repr__(self):
         return f'<Trade size={self.__size} time={self.__entry_bar}-{self.__exit_bar or ""} ' \
                f'price={self.__entry_price}-{self.__exit_price or ""} pl={self.pl:.0f}' \
-               f'{" tag="+str(self.__tag) if self.__tag is not None else ""}>'
+               f'{" tag=" + str(self.__tag) if self.__tag is not None else ""}>'
 
     def _replace(self, **kwargs):
         for k, v in kwargs.items():
@@ -1309,7 +1308,8 @@ def run(self, **kwargs) -> pd.Series:
         # np.nan >= 3 is not invalid; it's False.
         with np.errstate(invalid='ignore'):
 
-            for i in _tqdm(range(start, len(self._data)), desc=self.run.__qualname__):
+            for i in _tqdm(range(start, len(self._data)), desc=self.run.__qualname__,
+                           unit='bar', mininterval=2, miniters=100):
                 # Prepare data and indicators for `next` call
                 data._set_length(i + 1)
                 for attr, indicator in indicator_attrs:
@@ -1425,9 +1425,7 @@ def optimize(self, *,
         maximize_key = None
         if isinstance(maximize, str):
             maximize_key = str(maximize)
-            stats_keys = compute_stats(
-                [], np.r_[[np.nan]], pd.DataFrame({col: [np.nan] for col in ('Close',)}), None, 0).index
-            if maximize not in stats_keys:
+            if maximize not in dummy_stats().index:
                 raise ValueError('`maximize`, if str, must match a key in pd.Series '
                                  'result of backtest.run()')
 
@@ -1503,9 +1501,9 @@ def _optimize_grid() -> Union[pd.Series, Tuple[pd.Series, pd.Series]]:
                                     [p.values() for p in param_combos],
                                     names=next(iter(param_combos)).keys()))
 
-            with mp.Pool() as pool, \
+            from . import Pool
+            with Pool() as pool, \
                     SharedMemoryManager() as smm:
-
                 with patch(self, '_data', None):
                     bt = copy(self)  # bt._data will be reassigned in _mp_task worker
                 results = _tqdm(
@@ -1567,7 +1565,8 @@ def memoized_run(tup):
                 stats = self.run(**dict(tup))
                 return -maximize(stats)
 
-            progress = iter(_tqdm(repeat(None), total=max_tries, leave=False, desc='Backtest.optimize'))
+            progress = iter(_tqdm(repeat(None), total=max_tries, leave=False,
+                                  desc=self.optimize.__qualname__, mininterval=2))
             _names = tuple(kwargs.keys())
 
             def objective_function(x):
diff --git a/backtesting/lib.py b/backtesting/lib.py
@@ -13,7 +13,6 @@
 
 from __future__ import annotations
 
-import multiprocessing as mp
 import warnings
 from collections import OrderedDict
 from inspect import currentframe
@@ -497,7 +496,7 @@ def set_trailing_pct(self, pct: float = .05):
     def next(self):
         super().next()
         # Can't use index=-1 because self.__atr is not an Indicator type
-        index = len(self.data)-1
+        index = len(self.data) - 1
         for trade in self.trades:
             if trade.is_long:
                 trade.sl = max(trade.sl or -np.inf,
@@ -587,15 +586,17 @@ def run(self, **kwargs):
         Wraps `backtesting.backtesting.Backtest.run`. Returns `pd.DataFrame` with
         currency indexes in columns.
         """
-        with mp.Pool() as pool, \
+        from . import Pool
+        with Pool() as pool, \
                 SharedMemoryManager() as smm:
             shm = [smm.df2shm(df) for df in self._dfs]
             results = _tqdm(
                 pool.imap(self._mp_task_run,
                           ((df_batch, self._strategy, self._bt_kwargs, kwargs)
                            for df_batch in _batch(shm))),
                 total=len(shm),
-                desc=self.__class__.__name__,
+                desc=self.run.__qualname__,
+                mininterval=2
             )
             df = pd.DataFrame(list(chain(*results))).transpose()
         return df
@@ -623,7 +624,7 @@ def optimize(self, **kwargs) -> pd.DataFrame:
         """
         heatmaps = []
         # Simple loop since bt.optimize already does its own multiprocessing
-        for df in _tqdm(self._dfs, desc=self.__class__.__name__):
+        for df in _tqdm(self._dfs, desc=self.__class__.__name__, mininterval=2):
             bt = Backtest(df, self._strategy, **self._bt_kwargs)
             _best_stats, heatmap = bt.optimize(  # type: ignore
                 return_heatmap=True, return_optimization=False, **kwargs)
diff --git a/backtesting/test/_test.py b/backtesting/test/_test.py
@@ -1,4 +1,5 @@
 import inspect
+import multiprocessing as mp
 import os
 import sys
 import time
@@ -287,7 +288,7 @@ def test_compute_drawdown(self):
     def test_compute_stats(self):
         stats = Backtest(GOOG, SmaCross, finalize_trades=True).run()
         expected = pd.Series({
-                # NOTE: These values are also used on the website!
+                # NOTE: These values are also used on the website!  # noqa: E126
                 '# Trades': 66,
                 'Avg. Drawdown Duration': pd.Timedelta('41 days 00:00:00'),
                 'Avg. Drawdown [%]': -5.925851581948801,
@@ -629,7 +630,8 @@ def test_optimize_speed(self):
         bt.optimize(fast=range(2, 20, 2), slow=range(10, 40, 2))
         end = time.process_time()
         print(end - start)
-        self.assertLess(end - start, .3)
+        handicap = 5 if 'win' in sys.platform else .1
+        self.assertLess(end - start, .3 + handicap)
 
 
 class TestPlot(TestCase):
@@ -932,7 +934,7 @@ def next(self):
         self.assertEqual(stats['# Trades'], 56)
 
     def test_FractionalBacktest(self):
-        ubtc_bt = FractionalBacktest(BTCUSD['2015':], SmaCross, fractional_unit=1/1e6, cash=100)
+        ubtc_bt = FractionalBacktest(BTCUSD['2015':], SmaCross, fractional_unit=1 / 1e6, cash=100)
         stats = ubtc_bt.run(fast=2, slow=3)
         self.assertEqual(stats['# Trades'], 41)
         trades = stats['_trades']
@@ -942,13 +944,20 @@ def test_FractionalBacktest(self):
         self.assertAlmostEqual(stats['_strategy']._indicators[0][trade['EntryBar']], 234.14)
 
     def test_MultiBacktest(self):
-        btm = MultiBacktest([GOOG, EURUSD, BTCUSD], SmaCross, cash=100_000)
-        res = btm.run(fast=2)
-        self.assertIsInstance(res, pd.DataFrame)
-        self.assertEqual(res.columns.tolist(), [0, 1, 2])
-        heatmap = btm.optimize(fast=[2, 4], slow=[10, 20])
-        self.assertIsInstance(heatmap, pd.DataFrame)
-        self.assertEqual(heatmap.columns.tolist(), [0, 1, 2])
+        import backtesting
+        assert callable(getattr(backtesting, 'Pool', None)), backtesting.__dict__
+        for start_method in mp.get_all_start_methods():
+            with self.subTest(start_method=start_method), \
+                    patch(backtesting, 'Pool', mp.get_context(start_method).Pool):
+                start_time = time.monotonic()
+                btm = MultiBacktest([GOOG, EURUSD, BTCUSD], SmaCross, cash=100_000)
+                res = btm.run(fast=2)
+                self.assertIsInstance(res, pd.DataFrame)
+                self.assertEqual(res.columns.tolist(), [0, 1, 2])
+                heatmap = btm.optimize(fast=[2, 4], slow=[10, 20])
+                self.assertIsInstance(heatmap, pd.DataFrame)
+                self.assertEqual(heatmap.columns.tolist(), [0, 1, 2])
+                print(start_method, time.monotonic() - start_time)
         plot_heatmaps(heatmap.mean(axis=1), open_browser=False)
 
 
@@ -1009,7 +1018,6 @@ def test_indicators_picklable(self):
 class TestDocs(TestCase):
     DOCS_DIR = os.path.join(os.path.dirname(__file__), '..', '..', 'doc')
 
-    @unittest.skipIf('win' in sys.platform, "Locks up with `ModuleNotFoundError: No module named '<run_path>'`")
     @unittest.skipUnless(os.path.isdir(DOCS_DIR), "docs dir doesn't exist")
     def test_examples(self):
         examples = glob(os.path.join(self.DOCS_DIR, 'examples', '*.py'))
@@ -1127,3 +1135,9 @@ def next(self):
         trades = Backtest(SHORT_DATA, S).run()._trades
         self.assertEqual(trades['ExitBar'].iloc[0], 3)
         self.assertEqual(trades['ExitPrice'].iloc[0], 105)
+
+    def test_optimize_datetime_index_with_timezone(self):
+        data: pd.DataFrame = GOOG.iloc[:100]
+        data.index = data.index.tz_localize('Asia/Kolkata')
+        res = Backtest(data, SmaCross).optimize(fast=range(2, 3), slow=range(4, 5))
+        self.assertGreater(res['# Trades'], 0)
diff --git a/setup.cfg b/setup.cfg