Skip to content

Commit 4c93023

Browse files
authored
Add linear regression stats of log-equity curve (#21)
* add linear regression stats of log-equity curve
1 parent cf0cbf6 commit 4c93023

File tree

5 files changed

+247
-106
lines changed

5 files changed

+247
-106
lines changed

examples/20241227_introduction.ipynb

Lines changed: 107 additions & 99 deletions
Large diffs are not rendered by default.

kissbt/analyzer.py

Lines changed: 76 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import numpy as np
44
import pandas as pd
5+
from scipy.stats import linregress
56

67
from kissbt.broker import Broker
78

@@ -68,6 +69,52 @@ def __init__(
6869
self.analysis_df["benchmark"].cummax() - self.analysis_df["benchmark"]
6970
) / self.analysis_df["benchmark"].cummax()
7071

72+
def _equity_curve_stats(
73+
self,
74+
value_series: pd.Series,
75+
*,
76+
prefix: str = "",
77+
) -> Dict[str, float]:
78+
"""
79+
Calculate statistics of the equity curve based on the log-equity curve.
80+
This method performs a linear regression on the log-equity curve to estimate
81+
the slope, standard error, t-statistic, and R² value.
82+
83+
- slope: The slope of the log-equity curve, indicating the average return per
84+
bar.
85+
- slope_se: The standard error of the slope, indicating the variability of the
86+
average return.
87+
- slope_tstat: The t-statistic of the slope, indicating how strongly the data
88+
supports the presence of a non-zero trend in the log-equity curve.
89+
- r_squared: The R² value of the regression, indicating the proportion of
90+
variance explained.
91+
92+
Parameters:
93+
value_series (pd.Series): The series of values to analyze, typically the
94+
total value of the portfolio or benchmark.
95+
prefix (str): A prefix to add to the keys in the returned dictionary, useful
96+
for distinguishing between portfolio and benchmark statistics.
97+
"""
98+
99+
if (value_series <= 0).any():
100+
raise ValueError(
101+
"Value series contains non-positive values, cannot compute log-based statistics" # noqa: E501
102+
)
103+
y = np.log(value_series.to_numpy())
104+
x = np.arange(y.size, dtype=float)
105+
106+
res = linregress(x, y)
107+
slope, slope_se, r_squared = res.slope, res.stderr, res.rvalue**2
108+
109+
slope_tstat = slope / slope_se
110+
111+
return {
112+
f"{prefix}slope": slope,
113+
f"{prefix}slope_se": slope_se,
114+
f"{prefix}slope_tstat": slope_tstat,
115+
f"{prefix}r_squared": r_squared,
116+
}
117+
71118
def get_performance_metrics(self) -> Dict[str, float]:
72119
"""
73120
Calculate and return key performance metrics of the trading strategy.
@@ -85,9 +132,26 @@ def get_performance_metrics(self) -> Dict[str, float]:
85132
- profit_factor: The profit factor of the trading strategy, a ratio of gross
86133
profits to gross losses.
87134
88-
If a benchmark is available in the data, the dictionary also includes:
89-
- total_benchmark_return: The total return of the benchmark as a decimal.
90-
- annual_benchmark_return: The annualized return of the benchmark as a decimal.
135+
Additionally we compute the equity curve statistics for the portfolio's
136+
total value, including:
137+
- slope: The slope of the log-equity curve, indicating the average return per
138+
bar.
139+
- slope_se: The standard error of the slope, indicating the variability of the
140+
average return.
141+
- slope_tstat: The t-statistic of the slope (slope / slope_se), indicating how
142+
strongly the data supports the presence of a non-zero trend in the
143+
log-equity curve. A larger absolute value (positive or negative) provides
144+
stronger evidence against H_0 (β = 0), suggesting that the observed trend is
145+
unlikely to be due to random fluctuations. For typical backtests the
146+
t-statistic approximately follows a standard normal distribution. Values
147+
above +1.96 or below -1.96 are considered statistically significant at the
148+
95% confidence level.
149+
- r_squared: The R² value of the regression, indicating the proportion of
150+
variance explained by the model.
151+
152+
If a benchmark is available in the data, the dictionary also includes the
153+
total_return, annual_return, slope, slope_se, slope_tstat and r_squared for the
154+
benchmark, prefixed with "benchmark_".
91155
92156
Returns:
93157
Dict[str, float]: A dictionary containing the calculated performance
@@ -103,14 +167,21 @@ def get_performance_metrics(self) -> Dict[str, float]:
103167
"win_rate": self._calculate_win_rate(),
104168
"profit_factor": self._calculate_profit_factor(),
105169
}
170+
metrics.update(self._equity_curve_stats(self.analysis_df["total_value"]))
106171

107172
if "benchmark" in self.analysis_df.columns:
108-
metrics["total_benchmark_return"] = self._calculate_total_return(
173+
metrics["benchmark_total_return"] = self._calculate_total_return(
109174
"benchmark"
110175
)
111-
metrics["annual_benchmark_return"] = self._calculate_annual_return(
176+
metrics["benchmark_annual_return"] = self._calculate_annual_return(
112177
"benchmark"
113178
)
179+
metrics.update(
180+
self._equity_curve_stats(
181+
self.analysis_df["benchmark"],
182+
prefix="benchmark_",
183+
)
184+
)
114185

115186
return metrics
116187

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ classifiers = [
2222
dependencies = [
2323
"numpy",
2424
"pandas",
25+
"scipy",
2526
"matplotlib"
2627
]
2728
requires-python = ">=3.10"

tests/test_analyzer.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import numpy as np
2+
import pandas as pd
3+
import pytest
4+
from kissbt.analyzer import Analyzer
5+
from kissbt.broker import Broker
6+
7+
8+
def test_constant_growth_benchmark_stats():
9+
daily_return = 0.0001
10+
num_days = 252
11+
start_value = 100000.0
12+
values = [start_value * (1 + daily_return) ** i for i in range(num_days)]
13+
14+
broker = Broker(benchmark="constant_growth")
15+
for i, val in enumerate(values):
16+
ts = pd.Timestamp("2023-01-01") + pd.Timedelta(days=i)
17+
broker.history["timestamp"].append(ts)
18+
broker.history["total_value"].append(start_value)
19+
broker.history["benchmark"].append(val)
20+
broker.history["cash"].append(0)
21+
broker.history["long_position_value"].append(0)
22+
broker.history["short_position_value"].append(0)
23+
broker.history["positions"].append({})
24+
25+
analyzer = Analyzer(broker)
26+
metrics = analyzer.get_performance_metrics()
27+
28+
assert abs(metrics["benchmark_slope"] - np.log(1 + daily_return)) < 1e-10
29+
assert metrics["benchmark_slope_se"] < 1e-10
30+
assert metrics["benchmark_slope_tstat"] > 1e5
31+
assert metrics["benchmark_r_squared"] > 0.9999
32+
33+
34+
def test_portfolio_equity_curve_stats_with_volatility():
35+
np.random.seed(42)
36+
num_days = 256 * 3
37+
start_value = 100000.0
38+
39+
# Generate portfolio values with some volatility
40+
daily_returns = np.random.normal(0.001, 0.02, num_days)
41+
portfolio_values = [start_value]
42+
for ret in daily_returns:
43+
portfolio_values.append(portfolio_values[-1] * (1 + ret))
44+
45+
broker = Broker()
46+
for i, val in enumerate(portfolio_values):
47+
ts = pd.Timestamp("2023-01-01") + pd.Timedelta(days=i)
48+
broker.history["timestamp"].append(ts)
49+
broker.history["total_value"].append(val)
50+
broker.history["cash"].append(0)
51+
broker.history["long_position_value"].append(val)
52+
broker.history["short_position_value"].append(0)
53+
broker.history["positions"].append({})
54+
55+
analyzer = Analyzer(broker)
56+
metrics = analyzer.get_performance_metrics()
57+
58+
assert metrics["slope"] == pytest.approx(0.001, abs=0.0005)
59+
assert metrics["slope_se"] > 0
60+
assert metrics["slope_tstat"] > 1.96
61+
assert 0.5 < metrics["r_squared"] < 0.9

tests/test_integration.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,8 @@ def test_analyzer_with_golden_cross(tech_stock_data):
9898
assert pytest.approx(metrics["volatility"], abs=0.01) == 0.24
9999
assert pytest.approx(metrics["win_rate"], abs=0.01) == 0.47
100100
assert pytest.approx(metrics["profit_factor"], abs=0.01) == 3.17
101-
assert pytest.approx(metrics["total_benchmark_return"], abs=0.01) == 0.29
102-
assert pytest.approx(metrics["annual_benchmark_return"], abs=0.01) == 0.09
101+
assert pytest.approx(metrics["benchmark_total_return"], abs=0.01) == 0.29
102+
assert pytest.approx(metrics["benchmark_annual_return"], abs=0.01) == 0.09
103103

104104
# Ensure running the plot functions does not raise an exception
105105
analyzer.plot_equity_curve()

0 commit comments

Comments
 (0)