Merge pull request #19 from MrZLeo/master

jiegec · web-flow · commit 434e857e3aad · 2022-09-12T08:08:10.000+08:00
Improve benchmark support
diff --git a/Cargo.toml b/Cargo.toml
@@ -21,6 +21,9 @@ optional = true
 
 [dev-dependencies]
 criterion = "0.3"
+ctor = "0.1.23"
+rand = "0.8.5"
+rand_chacha = "0.3.1"
 
 [[bench]]
 name = "memory_allocator_benchmark"
diff --git a/benches/memory_allocator_benchmark.rs b/benches/memory_allocator_benchmark.rs
@@ -1,5 +1,7 @@
 #[macro_use]
 extern crate alloc;
+#[macro_use]
+extern crate ctor;
 
 use std::sync::Arc;
 use std::thread;
@@ -11,36 +13,57 @@ use alloc::alloc::Layout;
 use buddy_system_allocator::LockedHeap;
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
 
+const SMALL_SIZE: usize = 8;
+const LARGE_SIZE: usize = 1024 * 1024; // 1M
+const ALIGN: usize = 8;
+
+/// Alloc small object
 #[inline]
-pub fn large_alloc<const ORDER: usize>(heap: &LockedHeap<ORDER>) {
-    let layout = unsafe { Layout::from_size_align_unchecked(1024, 8) };
+pub fn small_alloc<const ORDER: usize>(heap: &LockedHeap<ORDER>) {
+    let layout = unsafe { Layout::from_size_align_unchecked(SMALL_SIZE, ALIGN) };
     unsafe {
         let addr = heap.alloc(layout);
         heap.dealloc(addr, layout);
     }
 }
 
+/// Alloc large object
 #[inline]
-pub fn small_alloc<const ORDER: usize>(heap: &LockedHeap<ORDER>) {
-    let layout = unsafe { Layout::from_size_align_unchecked(8, 8) };
+pub fn large_alloc<const ORDER: usize>(heap: &LockedHeap<ORDER>) {
+    let layout = unsafe { Layout::from_size_align_unchecked(LARGE_SIZE, ALIGN) };
     unsafe {
         let addr = heap.alloc(layout);
         heap.dealloc(addr, layout);
     }
 }
 
+/// Multithreads alloc random sizes of object
 #[inline]
-pub fn mutil_thread_alloc<const ORDER: usize>(heap: &'static LockedHeap<ORDER>) {
-    let mut threads = vec![];
+pub fn mutil_thread_random_size<const ORDER: usize>(heap: &'static LockedHeap<ORDER>) {
+    const THREAD_SIZE: usize = 10;
+
+    use rand::prelude::*;
+    use rand::{Rng, SeedableRng};
+    use rand_chacha::ChaCha8Rng;
+
+    let mut threads = Vec::with_capacity(THREAD_SIZE);
     let alloc = Arc::new(heap);
-    for i in 0..10 {
-        let a = alloc.clone();
+    for i in 0..THREAD_SIZE {
+        let prethread_alloc = alloc.clone();
         let handle = thread::spawn(move || {
-            let layout = unsafe { Layout::from_size_align_unchecked(i * 10, 8) };
-            let addr;
-            unsafe { addr = a.alloc(layout) }
-            sleep(Duration::from_nanos(10 - i as u64));
-            unsafe { a.dealloc(addr, layout) }
+            // generate a random size of object use seed `i` to ensure the fixed
+            // result of each turn
+            let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(i as u64);
+            // generate a random object size in range of [SMALL_SIZE ..= LARGE_SIZE]
+            let layout = unsafe {
+                Layout::from_size_align_unchecked(rng.gen_range(SMALL_SIZE..=LARGE_SIZE), ALIGN)
+            };
+            let addr = unsafe { prethread_alloc.alloc(layout) };
+
+            // sleep for a while
+            sleep(Duration::from_nanos((THREAD_SIZE - i) as u64));
+
+            unsafe { prethread_alloc.dealloc(addr, layout) }
         });
         threads.push(handle);
     }
@@ -51,32 +74,118 @@ pub fn mutil_thread_alloc<const ORDER: usize>(heap: &'static LockedHeap<ORDER>)
     }
 }
 
+/// Multithread benchmark inspired by **Hoard** benchmark
+///
+/// Warning: This benchmark generally needs long time to finish
+///
+/// ----------------------------------------------------------------------
+/// Hoard: A Fast, Scalable, and Memory-Efficient Allocator
+///       for Shared-Memory Multiprocessors
+/// Contact author: Emery Berger, http://www.cs.utexas.edu/users/emery
+//
+/// Copyright (c) 1998-2000, The University of Texas at Austin.
+///
+/// This library is free software; you can redistribute it and/or modify
+/// it under the terms of the GNU Library General Public License as
+/// published by the Free Software Foundation, http://www.fsf.org.
+///
+/// This library is distributed in the hope that it will be useful, but
+/// WITHOUT ANY WARRANTY; without even the implied warranty of
+/// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+/// Library General Public License for more details.
+/// ----------------------------------------------------------------------
+///
+#[inline]
+pub fn thread_test() {
+    const N_ITERATIONS: usize = 50;
+    const N_OBJECTS: usize = 30000;
+    const N_THREADS: usize = 10;
+    const OBJECT_SIZE: usize = 1;
+
+    #[derive(Clone)]
+    struct Foo {
+        pub a: i32,
+        pub b: i32,
+    }
+
+    let mut threads = Vec::with_capacity(N_THREADS);
+
+    for _i in 0..N_THREADS {
+        let handle = thread::spawn(move || {
+            // let a = new Foo * [nobjects / nthreads];
+            let mut a = Vec::with_capacity(N_OBJECTS / N_THREADS);
+            for j in 0..N_ITERATIONS {
+                // inner object:
+                // a[i] = new Foo[objSize];
+                for k in 0..(N_OBJECTS / N_THREADS) {
+                    a.push(vec![
+                        Foo {
+                            a: k as i32,
+                            b: j as i32
+                        };
+                        OBJECT_SIZE
+                    ]);
+
+                    // in order to prevent optimization delete allocation directly
+                    // FIXME: don't know whether it works or not
+                    a[k][0].a += a[k][0].b;
+                }
+            }
+            // auto drop here
+        });
+        threads.push(handle);
+    }
+
+    for t in threads {
+        t.join().unwrap();
+    }
+}
+
 const ORDER: usize = 32;
-static HEAP_ALLOCATOR: LockedHeap<ORDER> = LockedHeap::<ORDER>::new();
-const KERNEL_HEAP_SIZE: usize = 16 * 1024 * 1024;
 const MACHINE_ALIGN: usize = core::mem::size_of::<usize>();
+/// for now 128M is needed
+/// TODO: reduce memory use
+const KERNEL_HEAP_SIZE: usize = 128 * 1024 * 1024;
 const HEAP_BLOCK: usize = KERNEL_HEAP_SIZE / MACHINE_ALIGN;
 static mut HEAP: [usize; HEAP_BLOCK] = [0; HEAP_BLOCK];
 
-pub fn criterion_benchmark(c: &mut Criterion) {
-    // init heap
+/// Use `LockedHeap` as global allocator
+#[global_allocator]
+static HEAP_ALLOCATOR: LockedHeap<ORDER> = LockedHeap::<ORDER>::new();
+
+/// Init heap
+///
+/// We need `ctor` here because benchmark is running behind the std enviroment,
+/// which means std will do some initialization before execute `fn main()`.
+/// However, our memory allocator must be init in runtime(use linkedlist, which
+/// can not be evaluated in compile time). And in the initialization phase, heap
+/// memory is needed.
+///
+/// So the solution in this dilemma is to run `fn init_heap()` in initialization phase
+/// rather than in `fn main()`. We need `ctor` to do this.
+#[ctor]
+fn init_heap() {
     let heap_start = unsafe { HEAP.as_ptr() as usize };
     unsafe {
         HEAP_ALLOCATOR
             .lock()
             .init(heap_start, HEAP_BLOCK * MACHINE_ALIGN);
     }
+}
 
+/// Entry of benchmarks
+pub fn criterion_benchmark(c: &mut Criterion) {
     // run benchmark
     c.bench_function("small alloc", |b| {
         b.iter(|| small_alloc(black_box(&HEAP_ALLOCATOR)))
     });
     c.bench_function("large alloc", |b| {
         b.iter(|| large_alloc(black_box(&HEAP_ALLOCATOR)))
     });
-    c.bench_function("mutil thread alloc", |b| {
-        b.iter(|| mutil_thread_alloc(black_box(&HEAP_ALLOCATOR)))
+    c.bench_function("mutil thread random size", |b| {
+        b.iter(|| mutil_thread_random_size(black_box(&HEAP_ALLOCATOR)))
     });
+    c.bench_function("threadtest", |b| b.iter(|| thread_test()));
 }
 
 criterion_group!(benches, criterion_benchmark);