[LifetimeSafety] Add script performance benchmarking

usx95 · usx95 · commit 0fbfd74d23b6 · 2025-07-07T15:13:00.000Z
diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp
@@ -151,7 +151,12 @@ class OriginManager {
 
   OriginID get(const ValueDecl &D) {
     auto It = DeclToOriginID.find(&D);
-    assert(It != DeclToOriginID.end());
+    // TODO: This should be an assert(It != ExprToOriginID.end()). The current
+    // implementation falls back to getOrCreate to avoid crashing on
+    // yet-unhandled pointer expressions, creating an empty origin for them.
+    if (It == DeclToOriginID.end())
+      return getOrCreate(D);
+
     return It->second;
   }
 
diff --git a/clang/test/Analysis/lifetime_safety/benchmark.py b/clang/test/Analysis/lifetime_safety/benchmark.py
@@ -0,0 +1,215 @@
+import sys
+import argparse
+import subprocess
+import tempfile
+import json
+import os
+from datetime import datetime
+import numpy as np
+from scipy.optimize import curve_fit
+from scipy.stats import t
+
+def generate_cpp_cycle_test(n: int) -> str:
+    """
+    Generates a C++ code snippet with a specified number of pointers in a cycle.
+    """
+    if n <= 0:
+        return "// Number of variables must be positive."
+
+    cpp_code = "struct MyObj { int id; ~MyObj() {} };\n\n"
+    cpp_code += f"void long_cycle_{n}(bool condition) {{\n"
+    for i in range(1, n + 1):
+        cpp_code += f"  MyObj v{i}{{1}};\n"
+    cpp_code += "\n"
+    for i in range(1, n + 1):
+        cpp_code += f"  MyObj* p{i} = &v{i};\n"
+
+    cpp_code += "\n  while (condition) {\n"
+    if n > 0:
+        cpp_code += f"    MyObj* temp = p1;\n"
+        for i in range(1, n):
+            cpp_code += f"    p{i} = p{i+1};\n"
+        cpp_code += f"    p{n} = temp;\n"
+    cpp_code += "  }\n}\n"
+    cpp_code += f"\nint main() {{ long_cycle_{n}(false); return 0; }}\n"
+    return cpp_code
+
+def generate_cpp_merge_test(n: int) -> str:
+    """
+    Generates a C++ code snippet with N independent conditional assignments.
+    """
+    if n <= 0:
+        return "// Number of variables must be positive."
+
+    cpp_code = "struct MyObj { int id; ~MyObj() {} };\n\n"
+    cpp_code += f"void conditional_merges_{n}(bool condition) {{\n"
+    decls = [f"v{i}" for i in range(1, n + 1)]
+    cpp_code += f"  MyObj {', '.join(decls)};\n"
+    ptr_decls = [f"*p{i} = nullptr" for i in range(1, n + 1)]
+    cpp_code += f"  MyObj {', '.join(ptr_decls)};\n\n"
+
+    for i in range(1, n + 1):
+        cpp_code += f"  if(condition) {{ p{i} = &v{i}; }}\n"
+
+    cpp_code += "}\n"
+    cpp_code += f"\nint main() {{ conditional_merges_{n}(false); return 0; }}\n"
+    return cpp_code
+
+def analyze_trace_file(trace_path: str) -> tuple[float, float]:
+    """
+    Parses the -ftime-trace JSON output to find durations.
+
+    Returns:
+        A tuple of (lifetime_analysis_duration_us, total_clang_duration_us).
+    """
+    lifetime_duration = 0.0
+    total_duration = 0.0
+    try:
+        with open(trace_path, 'r') as f:
+            trace_data = json.load(f)
+            for event in trace_data.get('traceEvents', []):
+                if event.get('name') == 'LifetimeAnalysis':
+                    lifetime_duration += float(event.get('dur', 0))
+                if event.get('name') == 'ExecuteCompiler':
+                    total_duration += float(event.get('dur', 0))
+
+    except (IOError, json.JSONDecodeError) as e:
+        print(f"Error reading or parsing trace file {trace_path}: {e}", file=sys.stderr)
+        return 0.0, 0.0
+    return lifetime_duration, total_duration
+
+def power_law(n, c, k):
+    """Represents the power law function: y = c * n^k"""
+    return c * np.power(n, k)
+
+def human_readable_time(ms: float) -> str:
+    """Converts milliseconds to a human-readable string (ms or s)."""
+    if ms >= 1000:
+        return f"{ms / 1000:.2f} s"
+    return f"{ms:.2f} ms"
+
+def generate_markdown_report(results: dict) -> str:
+    """Generates a Markdown-formatted report from the benchmark results."""
+    report = []
+    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S %Z")
+    report.append(f"# Lifetime Analysis Performance Report")
+    report.append(f"> Generated on: {timestamp}")
+    report.append("\n---\n")
+
+    for test_type, data in results.items():
+        title = 'Pointer Cycle in Loop' if test_type == 'cycle' else 'CFG Merges'
+        report.append(f"## Test Case: {title}")
+        report.append("")
+
+        # Table header
+        report.append("| N   | Analysis Time | Total Clang Time |")
+        report.append("|:----|--------------:|-----------------:|")
+
+        # Table rows
+        n_data = np.array(data['n'])
+        analysis_data = np.array(data['lifetime_ms'])
+        total_data = np.array(data['total_ms'])
+        for i in range(len(n_data)):
+            analysis_str = human_readable_time(analysis_data[i])
+            total_str = human_readable_time(total_data[i])
+            report.append(f"| {n_data[i]:<3} | {analysis_str:>13} | {total_str:>16} |")
+
+        report.append("")
+
+        # Complexity analysis
+        report.append(f"**Complexity Analysis:**")
+        try:
+            popt, pcov = curve_fit(power_law, n_data, analysis_data, p0=[0, 2], maxfev=5000)
+            _, k = popt
+            
+            # R-squared calculation
+            residuals = analysis_data - power_law(n_data, *popt)
+            ss_res = np.sum(residuals**2)
+            ss_tot = np.sum((analysis_data - np.mean(analysis_data))**2)
+            r_squared = 1 - (ss_res / ss_tot)
+            
+            # Confidence Interval for k
+            alpha = 0.05  # 95% confidence
+            dof = max(0, len(n_data) - len(popt)) # degrees of freedom
+            t_val = t.ppf(1.0 - alpha / 2., dof)
+            # Standard error of the parameters
+            perr = np.sqrt(np.diag(pcov))
+            k_stderr = perr[1]
+            k_ci_lower = k - t_val * k_stderr
+            k_ci_upper = k + t_val * k_stderr
+
+            report.append(f"- The performance of the analysis for this case scales approximately as **O(n<sup>{k:.2f}</sup>)**.")
+            report.append(f"- **Goodness of Fit (R²):** `{r_squared:.4f}` (closer to 1.0 is better).")
+            report.append(f"- **95% Confidence Interval for exponent 'k':** `[{k_ci_lower:.2f}, {k_ci_upper:.2f}]`.")
+
+        except RuntimeError:
+            report.append("- Could not determine a best-fit curve for the data.")
+
+        report.append("\n---\n")
+
+    return "\n".join(report)
+
+def run_single_test(clang_binary: str, test_type: str, n: int) -> tuple[float, float]:
+    """Generates, compiles, and benchmarks a single test case."""
+    print(f"--- Running Test: {test_type.capitalize()} with N={n} ---")
+    
+    generated_code = ""
+    if test_type == 'cycle':
+        generated_code = generate_cpp_cycle_test(n)
+    else: # merge
+        generated_code = generate_cpp_merge_test(n)
+
+    with tempfile.NamedTemporaryFile(mode='w+', suffix='.cpp', delete=False) as tmp_cpp:
+        tmp_cpp.write(generated_code)
+        source_file = tmp_cpp.name
+    
+    trace_file = os.path.splitext(source_file)[0] + '.json'
+
+    clang_command = [
+        clang_binary, '-c', '-o', '/dev/null', '-ftime-trace=' + trace_file,
+        '-Wexperimental-lifetime-safety', '-std=c++17', source_file
+    ]
+
+    result = subprocess.run(clang_command, capture_output=True, text=True)
+    
+    if result.returncode != 0:
+        print(f"Compilation failed for N={n}!", file=sys.stderr)
+        print(result.stderr, file=sys.stderr)
+        os.remove(source_file)
+        return 0.0, 0.0
+    
+    lifetime_us, total_us = analyze_trace_file(trace_file)
+    os.remove(source_file)
+    os.remove(trace_file)
+    
+    return lifetime_us / 1000.0, total_us / 1000.0
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Generate, compile, and benchmark C++ test cases for Clang's lifetime analysis.")
+    parser.add_argument("--clang-binary", type=str, required=True, help="Path to the Clang executable.")
+    
+    args = parser.parse_args()
+
+    n_values = [10, 25, 50, 75, 100, 150, 200]
+    results = {
+        'cycle': {'n': [], 'lifetime_ms': [], 'total_ms': []},
+        'merge': {'n': [], 'lifetime_ms': [], 'total_ms': []}
+    }
+
+    print("Running performance benchmarks...")
+    for test_type in ['cycle', 'merge']:
+        for n in n_values:
+            lifetime_ms, total_ms = run_single_test(args.clang_binary, test_type, n)
+            if total_ms > 0:
+                results[test_type]['n'].append(n)
+                results[test_type]['lifetime_ms'].append(lifetime_ms)
+                results[test_type]['total_ms'].append(total_ms)
+                print(f"    Total: {human_readable_time(total_ms)} | Analysis: {human_readable_time(lifetime_ms)}")
+    
+    print("\n\n" + "="*80)
+    print("Generating Markdown Report...")
+    print("="*80 + "\n")
+    
+    markdown_report = generate_markdown_report(results)
+    print(markdown_report)
+