Skip to content

Commit 189dc56

Browse files
authored
Add script + configs (#157)
1 parent 2dd65a4 commit 189dc56

File tree

6 files changed

+1393
-0
lines changed

6 files changed

+1393
-0
lines changed

benchmarks/configs/config_basic.yaml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Basic benchmark configuration for flex attention
2+
# Usage: python flex_perf.py --config config_basic.yaml
3+
4+
# Core parameters
5+
dynamic: false
6+
calculate_bwd: false
7+
dtype: "bfloat16"
8+
9+
# Shape parameters
10+
b: [2, 8, 16] # batch sizes
11+
nh: ["16, 16", "16, 4"] # [query_heads,key_value_heads]
12+
s: [1024, 4096] # sequence lengths
13+
d: [64, 128] # head dimensions
14+
15+
# Attention types to benchmark
16+
mods: ["noop", "causal", "alibi", "sliding_window"]
17+
18+
# Backend and optimization
19+
backend: []
20+
max_autotune: false
21+
22+
# Decoding and cache settings
23+
decoding: false
24+
kv_size: null # Use batch sizes instead
25+
26+
# Metrics and output
27+
throughput: true # Always calculate TBS and TFLOPs
28+
show_speedups: false # Show speedup calculations
29+
save_path: null # No CSV output
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Comprehensive benchmark configuration for flex attention
2+
# Usage: python flex_perf.py --config config_comprehensive.yaml
3+
4+
# Core parameters
5+
dynamic: false
6+
calculate_bwd: true # Include backward pass timing
7+
dtype: "bfloat16"
8+
9+
# Shape parameters - larger sweep
10+
b: [1, 2, 4, 8, 16, 32] # batch sizes
11+
nh: ["16,16", "16,2", "32,32", "32,4"] # [query_heads,key_value_heads]
12+
s: [512, 1024, 2048, 4096, 8192] # sequence lengths
13+
d: [64, 128, 256] # head dimensions
14+
15+
# All attention types
16+
mods: ["noop", "causal", "rel", "head_bias", "alibi", "sliding_window", "prefix_lm", "softcap"]
17+
18+
# Multiple backends for comparison
19+
backend: ["efficient", "math", "fav2"]
20+
max_autotune: true
21+
22+
# Decoding and cache settings
23+
decoding: false
24+
kv_size: null
25+
26+
# Metrics and output
27+
throughput: true # Always calculate TBS and TFLOPs
28+
show_speedups: true # Show speedup calculations
29+
save_path: "comprehensive_results.csv" # Save to CSV
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Decoding benchmark configuration for flex attention
2+
# Usage: python flex_perf.py --config config_decoding.yaml
3+
4+
# Core parameters
5+
dynamic: false
6+
calculate_bwd: false # Decoding doesn't support backward
7+
dtype: "bfloat16"
8+
9+
# Shape parameters for decoding (query length = 1)
10+
b: [1, 4, 8, 16] # batch sizes
11+
nh: ["16,16", "16,2", "32,32"] # [query_heads,key_value_heads]
12+
s: [1024, 2048, 4096, 8192] # KV sequence lengths
13+
d: [64, 128] # head dimensions
14+
15+
# Attention types suitable for decoding
16+
mods: ["causal", "alibi", "sliding_window", "softcap"]
17+
18+
# Backends including decoding-optimized ones
19+
backend: ["efficient", "fav2", "fakv"]
20+
max_autotune: false
21+
22+
# Decoding and cache settings
23+
decoding: true # Enable decoding mode
24+
kv_size: null
25+
26+
# Metrics and output
27+
throughput: true # Always calculate TBS and TFLOPs
28+
show_speedups: false # Focus on raw performance metrics
29+
save_path: "decoding_results.csv"
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Memory-bound benchmark configuration for flex attention
2+
# Usage: python flex_perf.py --config config_memory_bound.yaml
3+
4+
# Core parameters
5+
dynamic: false
6+
calculate_bwd: false
7+
dtype: "bfloat16"
8+
9+
# Shape parameters - focus on memory efficiency
10+
b: [1, 2, 4] # smaller batch sizes
11+
nh: ["16,16", "32,32"] # [query_heads,key_value_heads]
12+
s: [4096, 8192, 16384] # longer sequences
13+
d: [128, 256] # larger head dimensions
14+
15+
# Attention types that benefit from memory optimization
16+
mods: ["causal", "sliding_window", "document_mask"]
17+
18+
# Efficient backends
19+
backend: ["efficient", "fav2"]
20+
max_autotune: true
21+
22+
# Use KV cache size instead of batch size
23+
decoding: false
24+
kv_size: [256, 512, 1024] # KV cache size in MiB
25+
26+
# Metrics and output
27+
throughput: true # Always calculate TBS and TFLOPs
28+
show_speedups: true
29+
save_path: "memory_bound_results.csv"

0 commit comments

Comments
 (0)