mirror of
https://github.com/wshobson/agents.git
synced 2026-03-18 17:47:16 +00:00
664 lines
21 KiB
Markdown
664 lines
21 KiB
Markdown
---
|
|
name: backtesting-frameworks
|
|
description: Build robust backtesting systems for trading strategies with proper handling of look-ahead bias, survivorship bias, and transaction costs. Use when developing trading algorithms, validating strategies, or building backtesting infrastructure.
|
|
---
|
|
|
|
# Backtesting Frameworks
|
|
|
|
Build robust, production-grade backtesting systems that avoid common pitfalls and produce reliable strategy performance estimates.
|
|
|
|
## When to Use This Skill
|
|
|
|
- Developing trading strategy backtests
|
|
- Building backtesting infrastructure
|
|
- Validating strategy performance
|
|
- Avoiding common backtesting biases
|
|
- Implementing walk-forward analysis
|
|
- Comparing strategy alternatives
|
|
|
|
## Core Concepts
|
|
|
|
### 1. Backtesting Biases
|
|
|
|
| Bias | Description | Mitigation |
|
|
| ---------------- | ------------------------- | ----------------------- |
|
|
| **Look-ahead** | Using future information | Point-in-time data |
|
|
| **Survivorship** | Only testing on survivors | Use delisted securities |
|
|
| **Overfitting** | Curve-fitting to history | Out-of-sample testing |
|
|
| **Selection** | Cherry-picking strategies | Pre-registration |
|
|
| **Transaction** | Ignoring trading costs | Realistic cost models |
|
|
|
|
### 2. Proper Backtest Structure
|
|
|
|
```
|
|
Historical Data
|
|
│
|
|
▼
|
|
┌─────────────────────────────────────────┐
|
|
│ Training Set │
|
|
│ (Strategy Development & Optimization) │
|
|
└─────────────────────────────────────────┘
|
|
│
|
|
▼
|
|
┌─────────────────────────────────────────┐
|
|
│ Validation Set │
|
|
│ (Parameter Selection, No Peeking) │
|
|
└─────────────────────────────────────────┘
|
|
│
|
|
▼
|
|
┌─────────────────────────────────────────┐
|
|
│ Test Set │
|
|
│ (Final Performance Evaluation) │
|
|
└─────────────────────────────────────────┘
|
|
```
|
|
|
|
### 3. Walk-Forward Analysis
|
|
|
|
```
|
|
Window 1: [Train──────][Test]
|
|
Window 2: [Train──────][Test]
|
|
Window 3: [Train──────][Test]
|
|
Window 4: [Train──────][Test]
|
|
─────▶ Time
|
|
```
|
|
|
|
## Implementation Patterns
|
|
|
|
### Pattern 1: Event-Driven Backtester
|
|
|
|
```python
|
|
from abc import ABC, abstractmethod
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime
|
|
from decimal import Decimal
|
|
from enum import Enum
|
|
from typing import Dict, List, Optional
|
|
import pandas as pd
|
|
import numpy as np
|
|
|
|
class OrderSide(Enum):
|
|
BUY = "buy"
|
|
SELL = "sell"
|
|
|
|
class OrderType(Enum):
|
|
MARKET = "market"
|
|
LIMIT = "limit"
|
|
STOP = "stop"
|
|
|
|
@dataclass
|
|
class Order:
|
|
symbol: str
|
|
side: OrderSide
|
|
quantity: Decimal
|
|
order_type: OrderType
|
|
limit_price: Optional[Decimal] = None
|
|
stop_price: Optional[Decimal] = None
|
|
timestamp: Optional[datetime] = None
|
|
|
|
@dataclass
|
|
class Fill:
|
|
order: Order
|
|
fill_price: Decimal
|
|
fill_quantity: Decimal
|
|
commission: Decimal
|
|
slippage: Decimal
|
|
timestamp: datetime
|
|
|
|
@dataclass
|
|
class Position:
|
|
symbol: str
|
|
quantity: Decimal = Decimal("0")
|
|
avg_cost: Decimal = Decimal("0")
|
|
realized_pnl: Decimal = Decimal("0")
|
|
|
|
def update(self, fill: Fill) -> None:
|
|
if fill.order.side == OrderSide.BUY:
|
|
new_quantity = self.quantity + fill.fill_quantity
|
|
if new_quantity != 0:
|
|
self.avg_cost = (
|
|
(self.quantity * self.avg_cost + fill.fill_quantity * fill.fill_price)
|
|
/ new_quantity
|
|
)
|
|
self.quantity = new_quantity
|
|
else:
|
|
self.realized_pnl += fill.fill_quantity * (fill.fill_price - self.avg_cost)
|
|
self.quantity -= fill.fill_quantity
|
|
|
|
@dataclass
|
|
class Portfolio:
|
|
cash: Decimal
|
|
positions: Dict[str, Position] = field(default_factory=dict)
|
|
|
|
def get_position(self, symbol: str) -> Position:
|
|
if symbol not in self.positions:
|
|
self.positions[symbol] = Position(symbol=symbol)
|
|
return self.positions[symbol]
|
|
|
|
def process_fill(self, fill: Fill) -> None:
|
|
position = self.get_position(fill.order.symbol)
|
|
position.update(fill)
|
|
|
|
if fill.order.side == OrderSide.BUY:
|
|
self.cash -= fill.fill_price * fill.fill_quantity + fill.commission
|
|
else:
|
|
self.cash += fill.fill_price * fill.fill_quantity - fill.commission
|
|
|
|
def get_equity(self, prices: Dict[str, Decimal]) -> Decimal:
|
|
equity = self.cash
|
|
for symbol, position in self.positions.items():
|
|
if position.quantity != 0 and symbol in prices:
|
|
equity += position.quantity * prices[symbol]
|
|
return equity
|
|
|
|
class Strategy(ABC):
|
|
@abstractmethod
|
|
def on_bar(self, timestamp: datetime, data: pd.DataFrame) -> List[Order]:
|
|
pass
|
|
|
|
@abstractmethod
|
|
def on_fill(self, fill: Fill) -> None:
|
|
pass
|
|
|
|
class ExecutionModel(ABC):
|
|
@abstractmethod
|
|
def execute(self, order: Order, bar: pd.Series) -> Optional[Fill]:
|
|
pass
|
|
|
|
class SimpleExecutionModel(ExecutionModel):
|
|
def __init__(self, slippage_bps: float = 10, commission_per_share: float = 0.01):
|
|
self.slippage_bps = slippage_bps
|
|
self.commission_per_share = commission_per_share
|
|
|
|
def execute(self, order: Order, bar: pd.Series) -> Optional[Fill]:
|
|
if order.order_type == OrderType.MARKET:
|
|
base_price = Decimal(str(bar["open"]))
|
|
|
|
# Apply slippage
|
|
slippage_mult = 1 + (self.slippage_bps / 10000)
|
|
if order.side == OrderSide.BUY:
|
|
fill_price = base_price * Decimal(str(slippage_mult))
|
|
else:
|
|
fill_price = base_price / Decimal(str(slippage_mult))
|
|
|
|
commission = order.quantity * Decimal(str(self.commission_per_share))
|
|
slippage = abs(fill_price - base_price) * order.quantity
|
|
|
|
return Fill(
|
|
order=order,
|
|
fill_price=fill_price,
|
|
fill_quantity=order.quantity,
|
|
commission=commission,
|
|
slippage=slippage,
|
|
timestamp=bar.name
|
|
)
|
|
return None
|
|
|
|
class Backtester:
|
|
def __init__(
|
|
self,
|
|
strategy: Strategy,
|
|
execution_model: ExecutionModel,
|
|
initial_capital: Decimal = Decimal("100000")
|
|
):
|
|
self.strategy = strategy
|
|
self.execution_model = execution_model
|
|
self.portfolio = Portfolio(cash=initial_capital)
|
|
self.equity_curve: List[tuple] = []
|
|
self.trades: List[Fill] = []
|
|
|
|
def run(self, data: pd.DataFrame) -> pd.DataFrame:
|
|
"""Run backtest on OHLCV data with DatetimeIndex."""
|
|
pending_orders: List[Order] = []
|
|
|
|
for timestamp, bar in data.iterrows():
|
|
# Execute pending orders at today's prices
|
|
for order in pending_orders:
|
|
fill = self.execution_model.execute(order, bar)
|
|
if fill:
|
|
self.portfolio.process_fill(fill)
|
|
self.strategy.on_fill(fill)
|
|
self.trades.append(fill)
|
|
|
|
pending_orders.clear()
|
|
|
|
# Get current prices for equity calculation
|
|
prices = {data.index.name or "default": Decimal(str(bar["close"]))}
|
|
equity = self.portfolio.get_equity(prices)
|
|
self.equity_curve.append((timestamp, float(equity)))
|
|
|
|
# Generate new orders for next bar
|
|
new_orders = self.strategy.on_bar(timestamp, data.loc[:timestamp])
|
|
pending_orders.extend(new_orders)
|
|
|
|
return self._create_results()
|
|
|
|
def _create_results(self) -> pd.DataFrame:
|
|
equity_df = pd.DataFrame(self.equity_curve, columns=["timestamp", "equity"])
|
|
equity_df.set_index("timestamp", inplace=True)
|
|
equity_df["returns"] = equity_df["equity"].pct_change()
|
|
return equity_df
|
|
```
|
|
|
|
### Pattern 2: Vectorized Backtester (Fast)
|
|
|
|
```python
|
|
import pandas as pd
|
|
import numpy as np
|
|
from typing import Callable, Dict, Any
|
|
|
|
class VectorizedBacktester:
|
|
"""Fast vectorized backtester for simple strategies."""
|
|
|
|
def __init__(
|
|
self,
|
|
initial_capital: float = 100000,
|
|
commission: float = 0.001, # 0.1%
|
|
slippage: float = 0.0005 # 0.05%
|
|
):
|
|
self.initial_capital = initial_capital
|
|
self.commission = commission
|
|
self.slippage = slippage
|
|
|
|
def run(
|
|
self,
|
|
prices: pd.DataFrame,
|
|
signal_func: Callable[[pd.DataFrame], pd.Series]
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Run backtest with signal function.
|
|
|
|
Args:
|
|
prices: DataFrame with 'close' column
|
|
signal_func: Function that returns position signals (-1, 0, 1)
|
|
|
|
Returns:
|
|
Dictionary with results
|
|
"""
|
|
# Generate signals (shifted to avoid look-ahead)
|
|
signals = signal_func(prices).shift(1).fillna(0)
|
|
|
|
# Calculate returns
|
|
returns = prices["close"].pct_change()
|
|
|
|
# Calculate strategy returns with costs
|
|
position_changes = signals.diff().abs()
|
|
trading_costs = position_changes * (self.commission + self.slippage)
|
|
|
|
strategy_returns = signals * returns - trading_costs
|
|
|
|
# Build equity curve
|
|
equity = (1 + strategy_returns).cumprod() * self.initial_capital
|
|
|
|
# Calculate metrics
|
|
results = {
|
|
"equity": equity,
|
|
"returns": strategy_returns,
|
|
"signals": signals,
|
|
"metrics": self._calculate_metrics(strategy_returns, equity)
|
|
}
|
|
|
|
return results
|
|
|
|
def _calculate_metrics(
|
|
self,
|
|
returns: pd.Series,
|
|
equity: pd.Series
|
|
) -> Dict[str, float]:
|
|
"""Calculate performance metrics."""
|
|
total_return = (equity.iloc[-1] / self.initial_capital) - 1
|
|
annual_return = (1 + total_return) ** (252 / len(returns)) - 1
|
|
annual_vol = returns.std() * np.sqrt(252)
|
|
sharpe = annual_return / annual_vol if annual_vol > 0 else 0
|
|
|
|
# Drawdown
|
|
rolling_max = equity.cummax()
|
|
drawdown = (equity - rolling_max) / rolling_max
|
|
max_drawdown = drawdown.min()
|
|
|
|
# Win rate
|
|
winning_days = (returns > 0).sum()
|
|
total_days = (returns != 0).sum()
|
|
win_rate = winning_days / total_days if total_days > 0 else 0
|
|
|
|
return {
|
|
"total_return": total_return,
|
|
"annual_return": annual_return,
|
|
"annual_volatility": annual_vol,
|
|
"sharpe_ratio": sharpe,
|
|
"max_drawdown": max_drawdown,
|
|
"win_rate": win_rate,
|
|
"num_trades": int((returns != 0).sum())
|
|
}
|
|
|
|
# Example usage
|
|
def momentum_signal(prices: pd.DataFrame, lookback: int = 20) -> pd.Series:
|
|
"""Simple momentum strategy: long when price > SMA, else flat."""
|
|
sma = prices["close"].rolling(lookback).mean()
|
|
return (prices["close"] > sma).astype(int)
|
|
|
|
# Run backtest
|
|
# backtester = VectorizedBacktester()
|
|
# results = backtester.run(price_data, lambda p: momentum_signal(p, 50))
|
|
```
|
|
|
|
### Pattern 3: Walk-Forward Optimization
|
|
|
|
```python
|
|
from typing import Callable, Dict, List, Tuple, Any
|
|
import pandas as pd
|
|
import numpy as np
|
|
from itertools import product
|
|
|
|
class WalkForwardOptimizer:
|
|
"""Walk-forward analysis with anchored or rolling windows."""
|
|
|
|
def __init__(
|
|
self,
|
|
train_period: int,
|
|
test_period: int,
|
|
anchored: bool = False,
|
|
n_splits: int = None
|
|
):
|
|
"""
|
|
Args:
|
|
train_period: Number of bars in training window
|
|
test_period: Number of bars in test window
|
|
anchored: If True, training always starts from beginning
|
|
n_splits: Number of train/test splits (auto-calculated if None)
|
|
"""
|
|
self.train_period = train_period
|
|
self.test_period = test_period
|
|
self.anchored = anchored
|
|
self.n_splits = n_splits
|
|
|
|
def generate_splits(
|
|
self,
|
|
data: pd.DataFrame
|
|
) -> List[Tuple[pd.DataFrame, pd.DataFrame]]:
|
|
"""Generate train/test splits."""
|
|
splits = []
|
|
n = len(data)
|
|
|
|
if self.n_splits:
|
|
step = (n - self.train_period) // self.n_splits
|
|
else:
|
|
step = self.test_period
|
|
|
|
start = 0
|
|
while start + self.train_period + self.test_period <= n:
|
|
if self.anchored:
|
|
train_start = 0
|
|
else:
|
|
train_start = start
|
|
|
|
train_end = start + self.train_period
|
|
test_end = min(train_end + self.test_period, n)
|
|
|
|
train_data = data.iloc[train_start:train_end]
|
|
test_data = data.iloc[train_end:test_end]
|
|
|
|
splits.append((train_data, test_data))
|
|
start += step
|
|
|
|
return splits
|
|
|
|
def optimize(
|
|
self,
|
|
data: pd.DataFrame,
|
|
strategy_func: Callable,
|
|
param_grid: Dict[str, List],
|
|
metric: str = "sharpe_ratio"
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Run walk-forward optimization.
|
|
|
|
Args:
|
|
data: Full dataset
|
|
strategy_func: Function(data, **params) -> results dict
|
|
param_grid: Parameter combinations to test
|
|
metric: Metric to optimize
|
|
|
|
Returns:
|
|
Combined results from all test periods
|
|
"""
|
|
splits = self.generate_splits(data)
|
|
all_results = []
|
|
optimal_params_history = []
|
|
|
|
for i, (train_data, test_data) in enumerate(splits):
|
|
# Optimize on training data
|
|
best_params, best_metric = self._grid_search(
|
|
train_data, strategy_func, param_grid, metric
|
|
)
|
|
optimal_params_history.append(best_params)
|
|
|
|
# Test with optimal params
|
|
test_results = strategy_func(test_data, **best_params)
|
|
test_results["split"] = i
|
|
test_results["params"] = best_params
|
|
all_results.append(test_results)
|
|
|
|
print(f"Split {i+1}/{len(splits)}: "
|
|
f"Best {metric}={best_metric:.4f}, params={best_params}")
|
|
|
|
return {
|
|
"split_results": all_results,
|
|
"param_history": optimal_params_history,
|
|
"combined_equity": self._combine_equity_curves(all_results)
|
|
}
|
|
|
|
def _grid_search(
|
|
self,
|
|
data: pd.DataFrame,
|
|
strategy_func: Callable,
|
|
param_grid: Dict[str, List],
|
|
metric: str
|
|
) -> Tuple[Dict, float]:
|
|
"""Grid search for best parameters."""
|
|
best_params = None
|
|
best_metric = -np.inf
|
|
|
|
# Generate all parameter combinations
|
|
param_names = list(param_grid.keys())
|
|
param_values = list(param_grid.values())
|
|
|
|
for values in product(*param_values):
|
|
params = dict(zip(param_names, values))
|
|
results = strategy_func(data, **params)
|
|
|
|
if results["metrics"][metric] > best_metric:
|
|
best_metric = results["metrics"][metric]
|
|
best_params = params
|
|
|
|
return best_params, best_metric
|
|
|
|
def _combine_equity_curves(
|
|
self,
|
|
results: List[Dict]
|
|
) -> pd.Series:
|
|
"""Combine equity curves from all test periods."""
|
|
combined = pd.concat([r["equity"] for r in results])
|
|
return combined
|
|
```
|
|
|
|
### Pattern 4: Monte Carlo Analysis
|
|
|
|
```python
|
|
import numpy as np
|
|
import pandas as pd
|
|
from typing import Dict, List
|
|
|
|
class MonteCarloAnalyzer:
|
|
"""Monte Carlo simulation for strategy robustness."""
|
|
|
|
def __init__(self, n_simulations: int = 1000, confidence: float = 0.95):
|
|
self.n_simulations = n_simulations
|
|
self.confidence = confidence
|
|
|
|
def bootstrap_returns(
|
|
self,
|
|
returns: pd.Series,
|
|
n_periods: int = None
|
|
) -> np.ndarray:
|
|
"""
|
|
Bootstrap simulation by resampling returns.
|
|
|
|
Args:
|
|
returns: Historical returns series
|
|
n_periods: Length of each simulation (default: same as input)
|
|
|
|
Returns:
|
|
Array of shape (n_simulations, n_periods)
|
|
"""
|
|
if n_periods is None:
|
|
n_periods = len(returns)
|
|
|
|
simulations = np.zeros((self.n_simulations, n_periods))
|
|
|
|
for i in range(self.n_simulations):
|
|
# Resample with replacement
|
|
simulated_returns = np.random.choice(
|
|
returns.values,
|
|
size=n_periods,
|
|
replace=True
|
|
)
|
|
simulations[i] = simulated_returns
|
|
|
|
return simulations
|
|
|
|
def analyze_drawdowns(
|
|
self,
|
|
returns: pd.Series
|
|
) -> Dict[str, float]:
|
|
"""Analyze drawdown distribution via simulation."""
|
|
simulations = self.bootstrap_returns(returns)
|
|
|
|
max_drawdowns = []
|
|
for sim_returns in simulations:
|
|
equity = (1 + sim_returns).cumprod()
|
|
rolling_max = np.maximum.accumulate(equity)
|
|
drawdowns = (equity - rolling_max) / rolling_max
|
|
max_drawdowns.append(drawdowns.min())
|
|
|
|
max_drawdowns = np.array(max_drawdowns)
|
|
|
|
return {
|
|
"expected_max_dd": np.mean(max_drawdowns),
|
|
"median_max_dd": np.median(max_drawdowns),
|
|
f"worst_{int(self.confidence*100)}pct": np.percentile(
|
|
max_drawdowns, (1 - self.confidence) * 100
|
|
),
|
|
"worst_case": max_drawdowns.min()
|
|
}
|
|
|
|
def probability_of_loss(
|
|
self,
|
|
returns: pd.Series,
|
|
holding_periods: List[int] = [21, 63, 126, 252]
|
|
) -> Dict[int, float]:
|
|
"""Calculate probability of loss over various holding periods."""
|
|
results = {}
|
|
|
|
for period in holding_periods:
|
|
if period > len(returns):
|
|
continue
|
|
|
|
simulations = self.bootstrap_returns(returns, period)
|
|
total_returns = (1 + simulations).prod(axis=1) - 1
|
|
prob_loss = (total_returns < 0).mean()
|
|
results[period] = prob_loss
|
|
|
|
return results
|
|
|
|
def confidence_interval(
|
|
self,
|
|
returns: pd.Series,
|
|
periods: int = 252
|
|
) -> Dict[str, float]:
|
|
"""Calculate confidence interval for future returns."""
|
|
simulations = self.bootstrap_returns(returns, periods)
|
|
total_returns = (1 + simulations).prod(axis=1) - 1
|
|
|
|
lower = (1 - self.confidence) / 2
|
|
upper = 1 - lower
|
|
|
|
return {
|
|
"expected": total_returns.mean(),
|
|
"lower_bound": np.percentile(total_returns, lower * 100),
|
|
"upper_bound": np.percentile(total_returns, upper * 100),
|
|
"std": total_returns.std()
|
|
}
|
|
```
|
|
|
|
## Performance Metrics
|
|
|
|
```python
|
|
def calculate_metrics(returns: pd.Series, rf_rate: float = 0.02) -> Dict[str, float]:
|
|
"""Calculate comprehensive performance metrics."""
|
|
# Annualization factor (assuming daily returns)
|
|
ann_factor = 252
|
|
|
|
# Basic metrics
|
|
total_return = (1 + returns).prod() - 1
|
|
annual_return = (1 + total_return) ** (ann_factor / len(returns)) - 1
|
|
annual_vol = returns.std() * np.sqrt(ann_factor)
|
|
|
|
# Risk-adjusted returns
|
|
sharpe = (annual_return - rf_rate) / annual_vol if annual_vol > 0 else 0
|
|
|
|
# Sortino (downside deviation)
|
|
downside_returns = returns[returns < 0]
|
|
downside_vol = downside_returns.std() * np.sqrt(ann_factor)
|
|
sortino = (annual_return - rf_rate) / downside_vol if downside_vol > 0 else 0
|
|
|
|
# Calmar ratio
|
|
equity = (1 + returns).cumprod()
|
|
rolling_max = equity.cummax()
|
|
drawdowns = (equity - rolling_max) / rolling_max
|
|
max_drawdown = drawdowns.min()
|
|
calmar = annual_return / abs(max_drawdown) if max_drawdown != 0 else 0
|
|
|
|
# Win rate and profit factor
|
|
wins = returns[returns > 0]
|
|
losses = returns[returns < 0]
|
|
win_rate = len(wins) / len(returns[returns != 0]) if len(returns[returns != 0]) > 0 else 0
|
|
profit_factor = wins.sum() / abs(losses.sum()) if losses.sum() != 0 else np.inf
|
|
|
|
return {
|
|
"total_return": total_return,
|
|
"annual_return": annual_return,
|
|
"annual_volatility": annual_vol,
|
|
"sharpe_ratio": sharpe,
|
|
"sortino_ratio": sortino,
|
|
"calmar_ratio": calmar,
|
|
"max_drawdown": max_drawdown,
|
|
"win_rate": win_rate,
|
|
"profit_factor": profit_factor,
|
|
"num_trades": int((returns != 0).sum())
|
|
}
|
|
```
|
|
|
|
## Best Practices
|
|
|
|
### Do's
|
|
|
|
- **Use point-in-time data** - Avoid look-ahead bias
|
|
- **Include transaction costs** - Realistic estimates
|
|
- **Test out-of-sample** - Always reserve data
|
|
- **Use walk-forward** - Not just train/test
|
|
- **Monte Carlo analysis** - Understand uncertainty
|
|
|
|
### Don'ts
|
|
|
|
- **Don't overfit** - Limit parameters
|
|
- **Don't ignore survivorship** - Include delisted
|
|
- **Don't use adjusted data carelessly** - Understand adjustments
|
|
- **Don't optimize on full history** - Reserve test set
|
|
- **Don't ignore capacity** - Market impact matters
|
|
|
|
## Resources
|
|
|
|
- [Advances in Financial Machine Learning (Marcos López de Prado)](https://www.amazon.com/Advances-Financial-Machine-Learning-Marcos/dp/1119482089)
|
|
- [Quantitative Trading (Ernest Chan)](https://www.amazon.com/Quantitative-Trading-Build-Algorithmic-Business/dp/1119800064)
|
|
- [Backtrader Documentation](https://www.backtrader.com/docu/)
|