From 19f455851d75f73864ce1ee60571cd4df75a93b5 Mon Sep 17 00:00:00 2001
From: markosg04 <mgeorghiades@a16z.com>
Date: Fri, 1 Aug 2025 13:58:37 -0400
Subject: [PATCH 1/4] working

---
 jolt-optimizations/src/batch_addition.rs | 145 +++++++++++++++++++++++
 jolt-optimizations/src/lib.rs            |   3 +
 2 files changed, 148 insertions(+)
 create mode 100644 jolt-optimizations/src/batch_addition.rs
diff --git a/jolt-optimizations/src/batch_addition.rs b/jolt-optimizations/src/batch_addition.rs
new file mode 100644
index 000000000..bd705e14f
--- /dev/null
+++ b/jolt-optimizations/src/batch_addition.rs
@@ -0,0 +1,145 @@
+//! Batch affine point addition for G1
+//!
+//! Implements efficient batch addition of affine elliptic curve points
+//! using Montgomery's batch inversion trick to minimize field inversions.
+
+use ark_bn254::G1Affine;
+use ark_ec::AffineRepr;
+use ark_ff::Zero;
+use ark_ec::CurveGroup;
+
+/// Performs batch addition of G1 affine points.
+///
+/// Given a slice of base points and indices, computes the sum of all points
+/// at the specified indices: bases[indices[0]] + bases[indices[1]] + ... + bases[indices[n-1]]
+///
+/// Uses batch inversion to compute all divisions efficiently:
+/// - Standard addition: 1 inversion per addition
+/// - Batch addition: 1 inversion + 3n multiplications for n additions
+///
+/// # Arguments
+/// * `bases` - Slice of G1 affine points to select from
+/// * `indices` - Slice of indices specifying which points to sum
+///
+/// # Returns
+/// The sum of all selected points as a single G1Affine point
+pub fn batch_g1_additions(bases: &[G1Affine], indices: &[usize]) -> G1Affine {
+    if indices.is_empty() {
+        return G1Affine::zero();
+    }
+    
+    if indices.len() == 1 {
+        return bases[indices[0]];
+    }
+    
+    // Collect all points to be added
+    let mut points_to_add: Vec<G1Affine> = indices.iter().map(|&i| bases[i]).collect();
+    
+    // Iteratively reduce pairs until we have a single result
+    while points_to_add.len() > 1 {
+        let mut next_round = Vec::new();
+        let mut denominators = Vec::new();
+        let mut pairs = Vec::new();
+        
+        // Process points in pairs
+        let mut i = 0;
+        while i < points_to_add.len() {
+            if i + 1 < points_to_add.len() {
+                let p1 = points_to_add[i];
+                let p2 = points_to_add[i + 1];
+                
+                // Handle special cases
+                if p1.is_zero() {
+                    next_round.push(p2);
+                } else if p2.is_zero() {
+                    next_round.push(p1);
+                } else if p1.x == p2.x {
+                    if p1.y == p2.y {
+                        // Same point - would need doubling formula
+                        // For now, just push p1 (in practice, implement doubling)
+                        next_round.push(p1);
+                    } else {
+                        // Inverse points - result is infinity
+                        next_round.push(G1Affine::zero());
+                    }
+                } else {
+                    // Normal case - store for batch processing
+                    denominators.push(p2.x - p1.x);
+                    pairs.push((p1, p2));
+                }
+                i += 2;
+            } else {
+                // Odd number of points - carry the last one forward
+                next_round.push(points_to_add[i]);
+                i += 1;
+            }
+        }
+        
+        // Batch invert all denominators
+        if !denominators.is_empty() {
+            let mut inverses = denominators;
+            ark_ff::fields::batch_inversion(&mut inverses);
+            
+            // Apply all additions
+            for ((p1, p2), inv) in pairs.iter().zip(inverses.iter()) {
+                let lambda = (p2.y - p1.y) * inv;
+                let x3 = lambda * lambda - p1.x - p2.x;
+                let y3 = lambda * (p1.x - x3) - p1.y;
+                next_round.push(G1Affine::new(x3, y3));
+            }
+        }
+        
+        points_to_add = next_round;
+    }
+    
+    points_to_add[0]
+}
+
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use ark_std::UniformRand;
+    
+    #[test]
+    fn test_batch_addition_correctness() {
+        let mut rng = ark_std::test_rng();
+        
+        // Generate random points
+        let bases: Vec<G1Affine> = (0..10)
+            .map(|_| G1Affine::rand(&mut rng))
+            .collect();
+        
+        // Create indices to sum
+        let indices = vec![2, 3, 4, 5, 6, 7];
+        
+        // Compute batch addition
+        let batch_result = batch_g1_additions(&bases, &indices);
+        
+        // Verify against sequential addition
+        let mut expected = G1Affine::zero();
+        for &idx in &indices {
+            expected = (expected + bases[idx]).into_affine();
+        }
+        
+        assert_eq!(batch_result, expected, "Batch addition mismatch");
+    }
+    
+    #[test]
+    fn test_empty_indices() {
+        let bases: Vec<G1Affine> = vec![G1Affine::generator(); 5];
+        let result = batch_g1_additions(&bases, &[]);
+        assert_eq!(result, G1Affine::zero());
+    }
+    
+    #[test]
+    fn test_single_index() {
+        let mut rng = ark_std::test_rng();
+        let bases: Vec<G1Affine> = (0..5)
+            .map(|_| G1Affine::rand(&mut rng))
+            .collect();
+        
+        let result = batch_g1_additions(&bases, &[2]);
+        assert_eq!(result, bases[2]);
+    }
+}
\ No newline at end of file
diff --git a/jolt-optimizations/src/lib.rs b/jolt-optimizations/src/lib.rs
index a27a3865a..503791f03 100644
--- a/jolt-optimizations/src/lib.rs
+++ b/jolt-optimizations/src/lib.rs
@@ -8,6 +8,7 @@
 //! Also provides BN254 G1 equivalents.
 //! Uses Strauss-shamir batched scalar multiplication to maximally take advantage of GLV.
 
+pub mod batch_addition;
 pub mod constants;
 pub mod decomp_2d;
 pub mod decomp_4d;
@@ -51,3 +52,5 @@ pub use dory_g2::{
     vector_add_scalar_mul_g2_online, vector_add_scalar_mul_g2_precomputed,
     vector_add_scalar_mul_g2_windowed2_signed, vector_scalar_mul_add_gamma_g2_online,
 };
+
+pub use batch_addition::batch_g1_additions;

From bbdf3d7c068c71d70149b34095d950defd366e96 Mon Sep 17 00:00:00 2001
From: markosg04 <mgeorghiades@a16z.com>
Date: Fri, 1 Aug 2025 14:18:51 -0400
Subject: [PATCH 2/4] optimize and bench

---
 jolt-optimizations/Cargo.toml                |   4 +
 jolt-optimizations/benches/batch_addition.rs |  56 +++++++++
 jolt-optimizations/src/batch_addition.rs     | 115 ++++++++++---------
 3 files changed, 123 insertions(+), 52 deletions(-)
 create mode 100644 jolt-optimizations/benches/batch_addition.rs

diff --git a/jolt-optimizations/Cargo.toml b/jolt-optimizations/Cargo.toml
index 4dd56c780..49aba6276 100644
--- a/jolt-optimizations/Cargo.toml
+++ b/jolt-optimizations/Cargo.toml
@@ -52,5 +52,9 @@ harness = false
 name = "vector_scalar_mul_add_gamma_g2"
 harness = false
 
+[[bench]]
+name = "batch_addition"
+harness = false
+
 [[example]]
 name = "memory_test"
\ No newline at end of file
diff --git a/jolt-optimizations/benches/batch_addition.rs b/jolt-optimizations/benches/batch_addition.rs
new file mode 100644
index 000000000..4bdfac90c
--- /dev/null
+++ b/jolt-optimizations/benches/batch_addition.rs
@@ -0,0 +1,56 @@
+use ark_bn254::G1Affine;
+use ark_ec::{AffineRepr, CurveGroup};
+use ark_std::UniformRand;
+use ark_std::rand::RngCore;
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
+use jolt_optimizations::batch_g1_additions;
+use rayon::prelude::*;
+
+fn naive_parallel_sum(bases: &[G1Affine], indices: &[usize]) -> G1Affine {
+    indices
+        .par_iter()
+        .map(|&idx| bases[idx])
+        .reduce(|| G1Affine::zero(), |acc, point| (acc + point).into_affine())
+}
+
+fn bench_batch_addition(c: &mut Criterion) {
+    let mut group = c.benchmark_group("batch_g1_addition");
+    let mut rng = ark_std::test_rng();
+    
+    // Test different sizes
+    for size in [1 << 20].iter() {
+        let bases: Vec<G1Affine> = (0..*size)
+            .map(|_| G1Affine::rand(&mut rng))
+            .collect();
+        
+        // Use half the points
+        let indices: Vec<usize> = (0..size / 2)
+            .map(|_| (rng.next_u64() as usize) % size)
+            .collect();
+        
+        group.bench_with_input(
+            BenchmarkId::new("batch_optimized", size),
+            size,
+            |b, _| {
+                b.iter(|| {
+                    black_box(batch_g1_additions(&bases, &indices))
+                });
+            },
+        );
+        
+        group.bench_with_input(
+            BenchmarkId::new("naive_parallel", size),
+            size,
+            |b, _| {
+                b.iter(|| {
+                    black_box(naive_parallel_sum(&bases, &indices))
+                });
+            },
+        );
+    }
+    
+    group.finish();
+}
+
+criterion_group!(benches, bench_batch_addition);
+criterion_main!(benches);
\ No newline at end of file
diff --git a/jolt-optimizations/src/batch_addition.rs b/jolt-optimizations/src/batch_addition.rs
index bd705e14f..5cd488b15 100644
--- a/jolt-optimizations/src/batch_addition.rs
+++ b/jolt-optimizations/src/batch_addition.rs
@@ -5,8 +5,7 @@
 
 use ark_bn254::G1Affine;
 use ark_ec::AffineRepr;
-use ark_ff::Zero;
-use ark_ec::CurveGroup;
+use rayon::prelude::*;
 
 /// Performs batch addition of G1 affine points.
 ///
@@ -32,67 +31,53 @@ pub fn batch_g1_additions(bases: &[G1Affine], indices: &[usize]) -> G1Affine {
         return bases[indices[0]];
     }
     
-    // Collect all points to be added
-    let mut points_to_add: Vec<G1Affine> = indices.iter().map(|&i| bases[i]).collect();
+    // Start with indices, convert to points only when needed
+    let mut points: Vec<G1Affine> = Vec::with_capacity(indices.len());
+    points.extend(indices.iter().map(|&i| bases[i]));
     
     // Iteratively reduce pairs until we have a single result
-    while points_to_add.len() > 1 {
-        let mut next_round = Vec::new();
-        let mut denominators = Vec::new();
-        let mut pairs = Vec::new();
-        
-        // Process points in pairs
-        let mut i = 0;
-        while i < points_to_add.len() {
-            if i + 1 < points_to_add.len() {
-                let p1 = points_to_add[i];
-                let p2 = points_to_add[i + 1];
-                
-                // Handle special cases
-                if p1.is_zero() {
-                    next_round.push(p2);
-                } else if p2.is_zero() {
-                    next_round.push(p1);
-                } else if p1.x == p2.x {
-                    if p1.y == p2.y {
-                        // Same point - would need doubling formula
-                        // For now, just push p1 (in practice, implement doubling)
-                        next_round.push(p1);
-                    } else {
-                        // Inverse points - result is infinity
-                        next_round.push(G1Affine::zero());
-                    }
-                } else {
-                    // Normal case - store for batch processing
-                    denominators.push(p2.x - p1.x);
-                    pairs.push((p1, p2));
-                }
-                i += 2;
-            } else {
-                // Odd number of points - carry the last one forward
-                next_round.push(points_to_add[i]);
-                i += 1;
-            }
-        }
+    while points.len() > 1 {
+        let current_len = points.len();
+        let pairs_count = current_len / 2;
+        let has_odd = current_len % 2 == 1;
+        
+        // Collect denominators in parallel
+        let denominators: Vec<_> = (0..pairs_count)
+            .into_par_iter()
+            .map(|i| {
+                let p1 = points[i * 2];
+                let p2 = points[i * 2 + 1];
+                p2.x - p1.x
+            })
+            .collect();
         
         // Batch invert all denominators
-        if !denominators.is_empty() {
-            let mut inverses = denominators;
-            ark_ff::fields::batch_inversion(&mut inverses);
-            
-            // Apply all additions
-            for ((p1, p2), inv) in pairs.iter().zip(inverses.iter()) {
+        let mut inverses = denominators;
+        ark_ff::fields::batch_inversion(&mut inverses);
+        
+        // Apply all additions in parallel
+        let mut new_points: Vec<G1Affine> = (0..pairs_count)
+            .into_par_iter()
+            .zip(inverses.par_iter())
+            .map(|(i, inv)| {
+                let p1 = points[i * 2];
+                let p2 = points[i * 2 + 1];
                 let lambda = (p2.y - p1.y) * inv;
                 let x3 = lambda * lambda - p1.x - p2.x;
                 let y3 = lambda * (p1.x - x3) - p1.y;
-                next_round.push(G1Affine::new(x3, y3));
-            }
+                G1Affine::new(x3, y3)
+            })
+            .collect();
+        
+        // Handle odd element
+        if has_odd {
+            new_points.push(points[current_len - 1]);
         }
         
-        points_to_add = next_round;
+        points = new_points;
     }
     
-    points_to_add[0]
+    points[0]
 }
 
 
@@ -100,6 +85,8 @@ pub fn batch_g1_additions(bases: &[G1Affine], indices: &[usize]) -> G1Affine {
 mod tests {
     use super::*;
     use ark_std::UniformRand;
+    use ark_ec::CurveGroup;
+    use ark_std::rand::RngCore;
     
     #[test]
     fn test_batch_addition_correctness() {
@@ -142,4 +129,28 @@ mod tests {
         let result = batch_g1_additions(&bases, &[2]);
         assert_eq!(result, bases[2]);
     }
+    
+    #[test] 
+    fn test_stress_test_correctness() {
+        let mut rng = ark_std::test_rng();
+        
+        // Large test case
+        let base_size = 10000;
+        let indices_size = 5000;
+        
+        let bases: Vec<G1Affine> = (0..base_size)
+            .map(|_| G1Affine::rand(&mut rng))
+            .collect();
+        
+        let indices: Vec<usize> = (0..indices_size)
+            .map(|_| (rng.next_u64() as usize) % base_size)
+            .collect();
+        
+        // Compute using batch addition
+        let batch_result = batch_g1_additions(&bases, &indices);
+        
+        // For very large tests, we'll just verify it doesn't panic
+        // and returns a valid point (not infinity unless expected)
+        assert!(!batch_result.is_zero() || indices.is_empty());
+    }
 }
\ No newline at end of file

From 0d6cf24ee5bec0642522a522e89a17e9440a498f Mon Sep 17 00:00:00 2001
From: markosg04 <mgeorghiades@a16z.com>
Date: Fri, 1 Aug 2025 14:27:51 -0400
Subject: [PATCH 3/4] cleanup

---
 jolt-optimizations/benches/batch_addition.rs | 50 +++++--------
 jolt-optimizations/src/batch_addition.rs     | 77 ++++++++------------
 2 files changed, 48 insertions(+), 79 deletions(-)

diff --git a/jolt-optimizations/benches/batch_addition.rs b/jolt-optimizations/benches/batch_addition.rs
index 4bdfac90c..597ac241f 100644
--- a/jolt-optimizations/benches/batch_addition.rs
+++ b/jolt-optimizations/benches/batch_addition.rs
@@ -1,56 +1,42 @@
 use ark_bn254::G1Affine;
 use ark_ec::{AffineRepr, CurveGroup};
-use ark_std::UniformRand;
 use ark_std::rand::RngCore;
+use ark_std::UniformRand;
 use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
 use jolt_optimizations::batch_g1_additions;
 use rayon::prelude::*;
 
 fn naive_parallel_sum(bases: &[G1Affine], indices: &[usize]) -> G1Affine {
-    indices
-        .par_iter()
-        .map(|&idx| bases[idx])
-        .reduce(|| G1Affine::zero(), |acc, point| (acc + point).into_affine())
+    indices.par_iter().map(|&idx| bases[idx]).reduce(
+        || G1Affine::zero(),
+        |acc, point| (acc + point).into_affine(),
+    )
 }
 
 fn bench_batch_addition(c: &mut Criterion) {
     let mut group = c.benchmark_group("batch_g1_addition");
     let mut rng = ark_std::test_rng();
-    
+
     // Test different sizes
     for size in [1 << 20].iter() {
-        let bases: Vec<G1Affine> = (0..*size)
-            .map(|_| G1Affine::rand(&mut rng))
-            .collect();
-        
+        let bases: Vec<G1Affine> = (0..*size).map(|_| G1Affine::rand(&mut rng)).collect();
+
         // Use half the points
         let indices: Vec<usize> = (0..size / 2)
             .map(|_| (rng.next_u64() as usize) % size)
             .collect();
-        
-        group.bench_with_input(
-            BenchmarkId::new("batch_optimized", size),
-            size,
-            |b, _| {
-                b.iter(|| {
-                    black_box(batch_g1_additions(&bases, &indices))
-                });
-            },
-        );
-        
-        group.bench_with_input(
-            BenchmarkId::new("naive_parallel", size),
-            size,
-            |b, _| {
-                b.iter(|| {
-                    black_box(naive_parallel_sum(&bases, &indices))
-                });
-            },
-        );
+
+        group.bench_with_input(BenchmarkId::new("batch_optimized", size), size, |b, _| {
+            b.iter(|| black_box(batch_g1_additions(&bases, &indices)));
+        });
+
+        group.bench_with_input(BenchmarkId::new("naive_parallel", size), size, |b, _| {
+            b.iter(|| black_box(naive_parallel_sum(&bases, &indices)));
+        });
     }
-    
+
     group.finish();
 }
 
 criterion_group!(benches, bench_batch_addition);
-criterion_main!(benches);
\ No newline at end of file
+criterion_main!(benches);
diff --git a/jolt-optimizations/src/batch_addition.rs b/jolt-optimizations/src/batch_addition.rs
index 5cd488b15..700b4d63f 100644
--- a/jolt-optimizations/src/batch_addition.rs
+++ b/jolt-optimizations/src/batch_addition.rs
@@ -26,21 +26,19 @@ pub fn batch_g1_additions(bases: &[G1Affine], indices: &[usize]) -> G1Affine {
     if indices.is_empty() {
         return G1Affine::zero();
     }
-    
+
     if indices.len() == 1 {
         return bases[indices[0]];
     }
-    
-    // Start with indices, convert to points only when needed
+
     let mut points: Vec<G1Affine> = Vec::with_capacity(indices.len());
     points.extend(indices.iter().map(|&i| bases[i]));
-    
-    // Iteratively reduce pairs until we have a single result
+
     while points.len() > 1 {
         let current_len = points.len();
         let pairs_count = current_len / 2;
         let has_odd = current_len % 2 == 1;
-        
+
         // Collect denominators in parallel
         let denominators: Vec<_> = (0..pairs_count)
             .into_par_iter()
@@ -50,11 +48,11 @@ pub fn batch_g1_additions(bases: &[G1Affine], indices: &[usize]) -> G1Affine {
                 p2.x - p1.x
             })
             .collect();
-        
+
         // Batch invert all denominators
         let mut inverses = denominators;
         ark_ff::fields::batch_inversion(&mut inverses);
-        
+
         // Apply all additions in parallel
         let mut new_points: Vec<G1Affine> = (0..pairs_count)
             .into_par_iter()
@@ -68,89 +66,74 @@ pub fn batch_g1_additions(bases: &[G1Affine], indices: &[usize]) -> G1Affine {
                 G1Affine::new(x3, y3)
             })
             .collect();
-        
+
         // Handle odd element
         if has_odd {
             new_points.push(points[current_len - 1]);
         }
-        
+
         points = new_points;
     }
-    
+
     points[0]
 }
 
-
 #[cfg(test)]
 mod tests {
     use super::*;
-    use ark_std::UniformRand;
     use ark_ec::CurveGroup;
     use ark_std::rand::RngCore;
-    
+    use ark_std::UniformRand;
+
     #[test]
     fn test_batch_addition_correctness() {
         let mut rng = ark_std::test_rng();
-        
-        // Generate random points
-        let bases: Vec<G1Affine> = (0..10)
-            .map(|_| G1Affine::rand(&mut rng))
-            .collect();
-        
-        // Create indices to sum
+
+        let bases: Vec<G1Affine> = (0..10).map(|_| G1Affine::rand(&mut rng)).collect();
+
         let indices = vec![2, 3, 4, 5, 6, 7];
-        
-        // Compute batch addition
+
         let batch_result = batch_g1_additions(&bases, &indices);
-        
-        // Verify against sequential addition
+
         let mut expected = G1Affine::zero();
         for &idx in &indices {
             expected = (expected + bases[idx]).into_affine();
         }
-        
+
         assert_eq!(batch_result, expected, "Batch addition mismatch");
     }
-    
+
     #[test]
     fn test_empty_indices() {
         let bases: Vec<G1Affine> = vec![G1Affine::generator(); 5];
         let result = batch_g1_additions(&bases, &[]);
         assert_eq!(result, G1Affine::zero());
     }
-    
+
     #[test]
     fn test_single_index() {
         let mut rng = ark_std::test_rng();
-        let bases: Vec<G1Affine> = (0..5)
-            .map(|_| G1Affine::rand(&mut rng))
-            .collect();
-        
+        let bases: Vec<G1Affine> = (0..5).map(|_| G1Affine::rand(&mut rng)).collect();
+
         let result = batch_g1_additions(&bases, &[2]);
         assert_eq!(result, bases[2]);
     }
-    
-    #[test] 
+
+    #[test]
     fn test_stress_test_correctness() {
         let mut rng = ark_std::test_rng();
-        
-        // Large test case
+
         let base_size = 10000;
         let indices_size = 5000;
-        
-        let bases: Vec<G1Affine> = (0..base_size)
-            .map(|_| G1Affine::rand(&mut rng))
-            .collect();
-        
+
+        let bases: Vec<G1Affine> = (0..base_size).map(|_| G1Affine::rand(&mut rng)).collect();
+
         let indices: Vec<usize> = (0..indices_size)
             .map(|_| (rng.next_u64() as usize) % base_size)
             .collect();
-        
-        // Compute using batch addition
+
         let batch_result = batch_g1_additions(&bases, &indices);
-        
-        // For very large tests, we'll just verify it doesn't panic
-        // and returns a valid point (not infinity unless expected)
+
         assert!(!batch_result.is_zero() || indices.is_empty());
     }
-}
\ No newline at end of file
+}

From ff8c9864700ff41625e6c8f0e74f68fa80e0e53b Mon Sep 17 00:00:00 2001
From: markosg04 <mgeorghiades@a16z.com>
Date: Fri, 1 Aug 2025 15:00:12 -0400
Subject: [PATCH 4/4] fixed test

---
 jolt-optimizations/src/batch_addition.rs | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/jolt-optimizations/src/batch_addition.rs b/jolt-optimizations/src/batch_addition.rs
index 700b4d63f..7b1808440 100644
--- a/jolt-optimizations/src/batch_addition.rs
+++ b/jolt-optimizations/src/batch_addition.rs
@@ -134,6 +134,15 @@ mod tests {
 
         let batch_result = batch_g1_additions(&bases, &indices);
 
-        assert!(!batch_result.is_zero() || indices.is_empty());
+        // Compute expected result using naive sequential addition
+        let mut expected = G1Affine::zero();
+        for &idx in &indices {
+            expected = (expected + bases[idx]).into_affine();
+        }
+
+        assert_eq!(
+            batch_result, expected,
+            "Stress test failed: batch result doesn't match expected sum"
+        );
     }
 }