diff --git a/.gitignore b/.gitignore
index 60dad509..4239ec66 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,5 +25,6 @@ build/sail_cSim/
 tests/**/*.elf
 tests/arch-test-target/config.ini
 tests/arch-test-target/sail_cSim/riscv_sim_RV32
+tests/scimark2/
 __pycache__/
 src/rv32_jit.c
diff --git a/.gitmodules b/.gitmodules
index b9b99ffa..24acaf30 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -23,3 +23,11 @@
 	path = src/ieeelib
 	url = https://github.com/sysprog21/ieeelib
 	shallow = true
+[submodule "tests/doom"]
+	path = tests/doom
+	url = https://github.com/sysprog21/doom_riscv
+	shallow = true
+[submodule "tests/quake"]
+	path = tests/quake
+	url = https://github.com/sysprog21/quake-embedded
+	shallow = true
diff --git a/Makefile b/Makefile
index 3c21b459..f0c6ebe6 100644
--- a/Makefile
+++ b/Makefile
@@ -275,15 +275,15 @@ misalign: $(BIN) artifact
 
 # Non-trivial demonstration programs
 ifeq ($(call has, SDL), 1)
-doom_action := (cd $(OUT); ../$(BIN) doom.elf)
+doom_action := (cd $(OUT); ../$(BIN) riscv32/doom)
 doom_deps += $(DOOM_DATA) $(BIN)
-doom: $(doom_deps)
+doom: artifact $(doom_deps)
 	$(doom_action)
 
 ifeq ($(call has, EXT_F), 1)
-quake_action := (cd $(OUT); ../$(BIN) quake.elf)
+quake_action := (cd $(OUT); ../$(BIN) riscv32/quake)
 quake_deps += $(QUAKE_DATA) $(BIN)
-quake: $(quake_deps)
+quake: artifact $(quake_deps)
 	$(quake_action)
 endif
 endif
diff --git a/build/doom.elf b/build/doom.elf
deleted file mode 100644
index d3c51ca0..00000000
Binary files a/build/doom.elf and /dev/null differ
diff --git a/build/quake.elf b/build/quake.elf
deleted file mode 100644
index 0a4971f1..00000000
Binary files a/build/quake.elf and /dev/null differ
diff --git a/build/scimark2.elf b/build/scimark2.elf
deleted file mode 100644
index 469e12f6..00000000
Binary files a/build/scimark2.elf and /dev/null differ
diff --git a/docs/prebuilt.md b/docs/prebuilt.md
index f06e28ad..9ca3a480 100644
--- a/docs/prebuilt.md
+++ b/docs/prebuilt.md
@@ -36,6 +36,7 @@ The prebuilt binaries in `rv32emu-prebuilt` are built from the following reposit
     - sha512
 - `captcha` : See [tests/captcha.c](/tests/captcha.c)
 - `donut` : See [tests/donut.c](/tests/donut.c)
+- `doom` : See [sysprog21/doom_riscv](https://github.com/sysprog21/doom_riscv)
 - `fcalc` : See [tests/fcalc.c](/tests/fcalc.c)
 - `hamilton` : See [tests/hamilton.c](/tests/hamilton.c)
 - `jit` : See [tests/jit.c](/tests/jit.c)
@@ -53,15 +54,16 @@ The prebuilt binaries in `rv32emu-prebuilt` are built from the following reposit
 - `spirograph` : See [tests/spirograph.c](/tests/spirograph.c)
 - `uaes` : See [tests/uaes.c](/tests/uaes.c)
 
+To determine performance of the floating point arithmetic, the following RISC-V binaries are built with option `-march=rv32imf`:
+- `quake` : See [sysprog21/quake-embedded](https://github.com/sysprog21/quake-embedded)
+- `scimark2` : See [Scimark 2.0](https://math.nist.gov/scimark2)
+
 There are still some prebuilt standalone RISC-V binaries under `build/` directory only for testing purpose:
 
 - `hello.elf` : See [tests/asm-hello](/tests/asm-hello)
 - `cc.elf` : See [tests/cc](/tests/cc)
 - `chacha20.elf` : See [tests/chacha20](/tests/chacha20)
-- `doom.elf` : See [sysprog21/doom_riscv](https://github.com/sysprog21/doom_riscv) [RV32M]
 - `ieee754.elf` : See [tests/ieee754.c](/tests/ieee754.c) [RV32F]
 - `jit-bf.elf` : See [ezaki-k/xkon_beta](https://github.com/ezaki-k/xkon_beta)
-- `quake.elf` : See [sysprog21/quake-embedded](https://github.com/sysprog21/quake-embedded) [RV32F]
 - `readelf.elf` : See [tests/readelf](/tests/readelf)
-- `scimark2.elf` : See [tests/scimark2](/tests/scimark2) [RV32MF]
 - `smolnes.elf` : See [tests/smolnes](/tests/smolnes.c) [RV32M]
diff --git a/mk/artifact.mk b/mk/artifact.mk
index 543955af..72450394 100644
--- a/mk/artifact.mk
+++ b/mk/artifact.mk
@@ -31,6 +31,9 @@ TEST_BENCHES += \
 	spirograph \
 	uaes
 
+SCIMARK2_URL := https://math.nist.gov/scimark2/scimark2_1c.zip
+SCIMARK2_SHA1 := de278c5b8cef84ab6dda41855052c7bfef919e36
+
 SHELL_HACK := $(shell mkdir -p $(BIN_DIR)/linux-x86-softfp $(BIN_DIR)/riscv32)
 
 ifeq ($(call has, PREBUILT), 1)
@@ -38,33 +41,67 @@ ifeq ($(call has, PREBUILT), 1)
 else
   # Since rv32emu only supports the dynamic binary translation of integer instruction in tiered compilation currently,
   # we disable the hardware floating-point and the related SIMD operation of x86.
-  CFLAGS := -m32 -mno-sse -mno-sse2 -msoft-float -O2 -L$(BIN_DIR)
+  CFLAGS := -m32 -mno-sse -mno-sse2 -msoft-float -O2 -Wno-unused-result -L$(BIN_DIR)
   LDFLAGS := -lsoft-fp -lm
 
-  CFLAGS_CROSS := -march=rv32im -mabi=ilp32 -O2
+  CFLAGS_CROSS := -march=rv32im -mabi=ilp32 -O2 -Wno-implicit-function-declaration
   LDFLAGS_CROSS := -lm -lsemihost
 endif
 
-.PHONY: artifact
+.PHONY: artifact scimark2 ieeelib
 
-artifact:
+artifact: ieeelib scimark2
 ifeq ($(call has, PREBUILT), 1)
 	$(Q)$(PRINTF) "Fetching prebuilt executables from \"rv32emu-prebuilt\" ...\n"
 	$(Q)wget -q --show-progress https://github.com/sysprog21/rv32emu-prebuilt/releases/download/$(LATEST_RELEASE)/rv32emu-prebuilt.tar.gz -O- | tar -C build --strip-components=1 -xz
 else
-	git submodule update --init ./src/ieeelib $(addprefix ./tests/,$(foreach tb,$(TEST_SUITES),$(tb)))
-	$(Q)$(MAKE) -C ./src/ieeelib CC=$(CC) CFLAGS="$(CFLAGS)" BINDIR=$(BIN_DIR)
+	git submodule update --init $(addprefix ./tests/,$(foreach tb,$(TEST_SUITES),$(tb)))
 	$(Q)for tb in $(TEST_SUITES); do \
 	    CC=$(CC) CFLAGS="$(CFLAGS)" LDFLAGS="$(LDFLAGS)" BINDIR=$(BIN_DIR)/linux-x86-softfp $(MAKE) -C ./tests/$$tb; \
 	done
 	$(Q)for tb in $(TEST_SUITES); do \
 	    CC=$(CROSS_COMPILE)gcc CFLAGS="$(CFLAGS_CROSS)" LDFLAGS="$(LDFLAGS_CROSS)" BINDIR=$(BIN_DIR)/riscv32 $(MAKE) -C ./tests/$$tb; \
 	done
+
 	$(Q)$(PRINTF) "Building standalone testbenches ...\n"
 	$(Q)for tb in $(TEST_BENCHES); do \
-	    $(CC) $(CFLAGS) -Wno-unused-result -o $(BIN_DIR)/linux-x86-softfp/$$tb ./tests/$$tb.c $(LDFLAGS); \
+	    $(CC) $(CFLAGS) -o $(BIN_DIR)/linux-x86-softfp/$$tb ./tests/$$tb.c $(LDFLAGS); \
 	done
 	$(Q)for tb in $(TEST_BENCHES); do \
 	    $(CROSS_COMPILE)gcc $(CFLAGS_CROSS) -o $(BIN_DIR)/riscv32/$$tb ./tests/$$tb.c $(LDFLAGS_CROSS); \
 	done
+
+	git submodule update --init ./tests/doom ./tests/quake
+	$(Q)$(PRINTF) "Building doom ...\n"
+	$(Q)$(MAKE) -C ./tests/doom/src/riscv CROSS=$(CROSS_COMPILE)
+	$(Q)cp ./tests/doom/src/riscv/doom-riscv.elf $(BIN_DIR)/riscv32/doom
+	$(Q)$(PRINTF) "Building quake ...\n"
+	$(Q)cd ./tests/quake && mkdir -p build && cd build && \
+	    cmake -DCMAKE_TOOLCHAIN_FILE=../port/boards/rv32emu/toolchain.cmake \
+	          -DCROSS_COMPILE=$(CROSS_COMPILE) \
+	          -DCMAKE_BUILD_TYPE=RELEASE -DBOARD_NAME=rv32emu .. && \
+	    make
+	$(Q)cp ./tests/quake/build/port/boards/rv32emu/quake $(BIN_DIR)/riscv32/quake
+endif
+
+scimark2:
+ifeq ($(call has, PREBUILT), 0)
+	$(Q)$(call prologue,"scimark2")
+	$(Q)$(call download,$(SCIMARK2_URL))
+	$(Q)$(call verify,$(SCIMARK2_SHA1),$(notdir $(SCIMARK2_URL)))
+	$(Q)$(call extract,"./tests/scimark2",$(notdir $(SCIMARK2_URL)))
+	$(Q)$(call epilogue,$(notdir $(SCIMARK2_URL)),$(SHA1_FILE1),$(SHA1_FILE2))
+	$(Q)$(PRINTF) "Building scimark2 ...\n"
+	$(Q)$(MAKE) -C ./tests/scimark2 clean && $(RM) ./tests/scimark2/scimark2.o
+	$(Q)$(MAKE) -C ./tests/scimark2 CC=$(CC) CFLAGS="-m32 -O2"
+	$(Q)cp ./tests/scimark2/scimark2 $(BIN_DIR)/linux-x86-softfp/scimark2
+	$(Q)$(MAKE) -C ./tests/scimark2 clean && $(RM) ./tests/scimark2/scimark2.o
+	$(Q)$(MAKE) -C ./tests/scimark2 CC=$(CROSS_COMPILE)gcc CFLAGS="-march=rv32imf -mabi=ilp32 -O2"
+	$(Q)cp ./tests/scimark2/scimark2 $(BIN_DIR)/riscv32/scimark2
+endif
+
+ieeelib:
+ifeq ($(call has, PREBUILT), 0)
+	git submodule update --init ./src/ieeelib
+	$(Q)$(MAKE) -C ./src/ieeelib CC=$(CC) CFLAGS="$(CFLAGS)" BINDIR=$(BIN_DIR)
 endif
diff --git a/mk/external.mk b/mk/external.mk
index 202c2589..a6a94676 100644
--- a/mk/external.mk
+++ b/mk/external.mk
@@ -25,14 +25,15 @@ define download
     $(eval _ := $(shell wget -q --show-progress --continue "$(strip $(1))"))
 endef
 
-# $(1): compressed source(.zip or.gz)
+# $(1): destination directory
+# $(2): compressed source(.zip or.gz)
 define extract
-    $(eval COMPRESSED_SUFFIX := $(suffix $(1)))
+    $(eval COMPRESSED_SUFFIX := $(suffix $(2)))
     $(eval COMPRESSED_IS_ZIP := $(filter $(COMPRESSED_SUFFIX),.zip))
     $(eval _ :=  \
         $(if $(COMPRESSED_IS_ZIP), \
-            ($(eval EXTRACTOR := unzip -d $(OUT) $(1))), \
-            ($(eval EXTRACTOR := tar -xf $(1) -C $(OUT))) \
+            ($(eval EXTRACTOR := unzip -d $(1) $(2))), \
+            ($(eval EXTRACTOR := tar -xf $(2) -C $(1))) \
     ))
     $(eval _ := $(shell $(EXTRACTOR)))
 endef
@@ -90,7 +91,7 @@ define download-extract-verify
 $($(T)_DATA):
 	$(Q)$$(call prologue,$$@)
 	$(Q)$$(call download,$(strip $($(T)_DATA_URL)))
-	$(Q)$$(call extract,$(notdir $($(T)_DATA_URL)))
+	$(Q)$$(call extract,$(OUT),$(notdir $($(T)_DATA_URL)))
 	$(Q)$$(call verify,$($(T)_DATA_SHA1), $($(T)_DATA))
 	$(Q)$$(call epilogue,$(notdir $($(T)_DATA_URL)),$(SHA1_FILE1),$(SHA1_FILE2))
 endef
diff --git a/tests/doom b/tests/doom
new file mode 160000
index 00000000..9b238b8e
--- /dev/null
+++ b/tests/doom
@@ -0,0 +1 @@
+Subproject commit 9b238b8ef747583e3968f23e0a28057446f79523
diff --git a/tests/quake b/tests/quake
new file mode 160000
index 00000000..da5c5ac7
--- /dev/null
+++ b/tests/quake
@@ -0,0 +1 @@
+Subproject commit da5c5ac793f039eb7e7ce349311cca0309acc10f
diff --git a/tests/scimark2/FFT.c b/tests/scimark2/FFT.c
deleted file mode 100644
index 33cc6481..00000000
--- a/tests/scimark2/FFT.c
+++ /dev/null
@@ -1,159 +0,0 @@
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "FFT.h"
-
-#ifndef M_PI
-#define M_PI 3.1415926535897932
-#endif
-
-/*-----------------------------------------------------------------------*/
-
-static int int_log2(int n);
-
-double FFT_num_flops(int N)
-{
-    double Nd = (double) N;
-    double logN = (double) int_log2(N);
-
-    return (5.0 * Nd - 2) * logN + 2 * (Nd + 1);
-}
-
-static int int_log2(int n)
-{
-    int log = 0;
-    for (int k = 1; k < n; k *= 2, log++)
-        ;
-    if (n != (1 << log)) {
-        printf("FFT: Data length is not a power of 2!: %d ", n);
-        exit(1);
-    }
-    return log;
-}
-
-static void FFT_transform_internal(int N, double *data, int direction)
-{
-    int n = N / 2;
-    int bit = 0;
-    int logn;
-    int dual = 1;
-
-    if (n == 1)
-        return; /* Identity operation! */
-    logn = int_log2(n);
-
-
-    if (N == 0)
-        return;
-
-    /* bit reverse the input data for decimation in time algorithm */
-    FFT_bitreverse(N, data);
-
-    /* apply fft recursion */
-    /* this loop executed int_log2(N) times */
-    for (bit = 0; bit < logn; bit++, dual *= 2) {
-        double w_real = 1.0;
-        double w_imag = 0.0;
-        int a, b;
-
-        double theta = 2.0 * direction * M_PI / (2.0 * (double) dual);
-        double s = sin(theta);
-        double t = sin(theta / 2.0);
-        double s2 = 2.0 * t * t;
-
-        for (a = 0, b = 0; b < n; b += 2 * dual) {
-            int i = 2 * b;
-            int j = 2 * (b + dual);
-
-            double wd_real = data[j];
-            double wd_imag = data[j + 1];
-
-            data[j] = data[i] - wd_real;
-            data[j + 1] = data[i + 1] - wd_imag;
-            data[i] += wd_real;
-            data[i + 1] += wd_imag;
-        }
-
-        /* a = 1 .. (dual-1) */
-        for (a = 1; a < dual; a++) {
-            /* trignometric recurrence for w-> exp(i theta) w */
-            {
-                double tmp_real = w_real - s * w_imag - s2 * w_real;
-                double tmp_imag = w_imag + s * w_real - s2 * w_imag;
-                w_real = tmp_real;
-                w_imag = tmp_imag;
-            }
-            for (b = 0; b < n; b += 2 * dual) {
-                int i = 2 * (b + a);
-                int j = 2 * (b + a + dual);
-
-                double z1_real = data[j];
-                double z1_imag = data[j + 1];
-
-                double wd_real = w_real * z1_real - w_imag * z1_imag;
-                double wd_imag = w_real * z1_imag + w_imag * z1_real;
-
-                data[j] = data[i] - wd_real;
-                data[j + 1] = data[i + 1] - wd_imag;
-                data[i] += wd_real;
-                data[i + 1] += wd_imag;
-            }
-        }
-    }
-}
-
-void FFT_bitreverse(int N, double *data)
-{
-    /* This is the Goldrader bit-reversal algorithm */
-    int n = N / 2;
-    int nm1 = n - 1;
-    int i = 0;
-    int j = 0;
-    for (; i < nm1; i++) {
-        /*int ii = 2*i; */
-        int ii = i << 1;
-
-        /*int jj = 2*j; */
-        int jj = j << 1;
-
-        /* int k = n / 2 ; */
-        int k = n >> 1;
-
-        if (i < j) {
-            double tmp_real = data[ii];
-            double tmp_imag = data[ii + 1];
-            data[ii] = data[jj];
-            data[ii + 1] = data[jj + 1];
-            data[jj] = tmp_real;
-            data[jj + 1] = tmp_imag;
-        }
-
-        while (k <= j) {
-            /*j = j - k ; */
-            j -= k;
-
-            /*k = k / 2 ;  */
-            k >>= 1;
-        }
-        j += k;
-    }
-}
-
-void FFT_transform(int N, double *data)
-{
-    FFT_transform_internal(N, data, -1);
-}
-
-void FFT_inverse(int N, double *data)
-{
-    int n = N / 2;
-    double norm = 0.0;
-    int i = 0;
-    FFT_transform_internal(N, data, +1);
-
-    /* Normalize */
-    norm = 1 / ((double) n);
-    for (i = 0; i < N; i++)
-        data[i] *= norm;
-}
diff --git a/tests/scimark2/FFT.h b/tests/scimark2/FFT.h
deleted file mode 100644
index 932ff75d..00000000
--- a/tests/scimark2/FFT.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#pragma once
-
-void FFT_transform(int N, double *data);
-void FFT_inverse(int N, double *data);
-void FFT_bitreverse(int N, double *data);
-double FFT_num_flops(int N);
diff --git a/tests/scimark2/LU.c b/tests/scimark2/LU.c
deleted file mode 100644
index f2ec57c9..00000000
--- a/tests/scimark2/LU.c
+++ /dev/null
@@ -1,70 +0,0 @@
-#include "LU.h"
-#include <math.h>
-
-double LU_num_flops(int N)
-{
-    /* rougly 2/3*N^3 */
-    double Nd = (double) N;
-
-    return (2.0 * Nd * Nd * Nd / 3.0);
-}
-
-int LU_factor(int M, int N, double **A, int *pivot)
-{
-    int minMN = M < N ? M : N;
-
-    for (int j = 0; j < minMN; j++) {
-        /* find pivot in column j and  test for singularity. */
-        int jp = j;
-
-        double t = fabs(A[j][j]);
-        for (int i = j + 1; i < M; i++) {
-            double ab = fabs(A[i][j]);
-            if (ab > t) {
-                jp = i;
-                t = ab;
-            }
-        }
-
-        pivot[j] = jp;
-
-        /* jp now has the index of maximum element  */
-        /* of column j, below the diagonal          */
-
-        if (A[jp][j] == 0)
-            return 1; /* factorization failed because of zero pivot */
-
-        if (jp != j) {
-            /* swap rows j and jp */
-            double *tA = A[j];
-            A[j] = A[jp];
-            A[jp] = tA;
-        }
-
-        if (j < M - 1) { /* compute elements j+1:M of jth column  */
-            /* note A(j,j), was A(jp,p) previously which was */
-            /* guarranteed not to be zero (Label #1)         */
-
-            double recp = 1.0 / A[j][j];
-            int k;
-            for (k = j + 1; k < M; k++)
-                A[k][j] *= recp;
-        }
-
-        if (j < minMN - 1) {
-            /* rank-1 update to trailing submatrix:   E = E - x*y; */
-            /* E is the region A(j+1:M, j+1:N) */
-            /* x is the column vector A(j+1:M,j) */
-            /* y is row vector A(j,j+1:N)        */
-            for (int ii = j + 1; ii < M; ii++) {
-                double *Aii = A[ii];
-                double *Aj = A[j];
-                double AiiJ = Aii[j];
-                for (int jj = j + 1; jj < N; jj++)
-                    Aii[jj] -= AiiJ * Aj[jj];
-            }
-        }
-    }
-
-    return 0;
-}
diff --git a/tests/scimark2/LU.h b/tests/scimark2/LU.h
deleted file mode 100644
index 5b5e7ff9..00000000
--- a/tests/scimark2/LU.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#pragma once
-
-double LU_num_flops(int N);
-int LU_factor(int M, int N, double **A, int *pivot);
diff --git a/tests/scimark2/Makefile b/tests/scimark2/Makefile
deleted file mode 100644
index 83787341..00000000
--- a/tests/scimark2/Makefile
+++ /dev/null
@@ -1,27 +0,0 @@
-.PHONY: clean
-
-include ../../mk/toolchain.mk
-
-CFLAGS = -march=rv32imf -mabi=ilp32
-CFLAGS += -Wall -Ofast -flto
-LDFLAGS = -lm -flto
-
-.SUFFIXES: .c .o
-
-%.o: %.c
-	$(CROSS_COMPILE)gcc $(CFLAGS) -c -o $@ $<
-
-BIN = scimark2.elf
-
-all: $(BIN)
-
-OBJS = \
-	FFT.o kernel.o Stopwatch.o Random.o SOR.o SparseCompRow.o \
-	array.o MonteCarlo.o LU.o \
-	scimark2.o
-
-$(BIN): $(OBJS)
-	$(CROSS_COMPILE)gcc $(CFLAGS) -o $@ $^ $(LDFLAGS)
-
-clean:
-	rm -f $(OBJS) $(BIN)
diff --git a/tests/scimark2/MonteCarlo.c b/tests/scimark2/MonteCarlo.c
deleted file mode 100644
index 34696e24..00000000
--- a/tests/scimark2/MonteCarlo.c
+++ /dev/null
@@ -1,57 +0,0 @@
-#include "Random.h"
-
-/**
- Estimate Pi by approximating the area of a circle.
-
- How: generate N random numbers in the unit square, (0,0) to (1,1)
- and see how are within a radius of 1 or less, i.e.
- <pre>
-
- sqrt(x^2 + y^2) < r
-
- </pre>
-  since the radius is 1.0, we can square both sides
-  and avoid a sqrt() computation:
-  <pre>
-
-    x^2 + y^2 <= 1.0
-
-  </pre>
-  this area under the curve is (Pi * r^2)/ 4.0,
-  and the area of the unit of square is 1.0,
-  so Pi can be approximated by
-  <pre>
-                # points with x^2+y^2 < 1
-     Pi =~      --------------------------  * 4.0
-                     total # points
-
-  </pre>
-
-*/
-
-static const int SEED = 113;
-
-double MonteCarlo_num_flops(int Num_samples)
-{
-    /* 3 flops in x^2+y^2 and 1 flop in random routine */
-    return ((double) Num_samples) * 4.0;
-}
-
-double MonteCarlo_integrate(int Num_samples)
-{
-    Random R = new_Random_seed(SEED);
-
-    int under_curve = 0;
-
-    for (int count = 0; count < Num_samples; count++) {
-        double x = Random_nextDouble(R);
-        double y = Random_nextDouble(R);
-
-        if (x * x + y * y <= 1.0)
-            under_curve++;
-    }
-
-    Random_delete(R);
-
-    return ((double) under_curve / Num_samples) * 4.0;
-}
diff --git a/tests/scimark2/MonteCarlo.h b/tests/scimark2/MonteCarlo.h
deleted file mode 100644
index ea3f4adf..00000000
--- a/tests/scimark2/MonteCarlo.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#pragma once
-
-double MonteCarlo_integrate(int Num_samples);
-double MonteCarlo_num_flops(int Num_samples);
diff --git a/tests/scimark2/README.md b/tests/scimark2/README.md
deleted file mode 100644
index d5316ad5..00000000
--- a/tests/scimark2/README.md
+++ /dev/null
@@ -1,44 +0,0 @@
-# SciMark2 (C version)
-
-This is an ANSI C version of the SciMark2 benchmark, translated from the
-original Java sources. The intent in making this benchmark available in C is
-for mainly for performance comparisons. For more information about SciMark, see http://math.nist.gov/scimark.
-
-Results of this benchmark can be sent to pozo@nist.gov.
-
-The program is split up into the main driver (`scimark2.c`) and kernel routines.
-A sample makefile is included;  however, one could simply write 
-
-```shell
-cc -o scimark2  -O *.c
-```
-
-and then run
-```shell
-./scimark2
-```
-
-This produces an output similar to
-```
-**                                                              **
-** SciMark2 Numeric Benchmark, see http://math.nist.gov/scimark **
-** for details. (Results can be submitted to pozo@nist.gov)     **
-**                                                              **
-Using       2.00 seconds min time per kenel.
-Composite Score:           65.56
-FFT             Mflops:    63.38    (N=1024)
-SOR             Mflops:   124.80    (100 x 100)
-MonteCarlo:     Mflops:    16.05
-Sparse matmult  Mflops:    59.15    (N=1000, nz=5000)
-LU              Mflops:    64.40    (M=100, N=100)
-0:29.62 Elapsed, 29.620 user sec, 0.010 sys sec, 100.0% utilization.
-```
-
-The first SciMark number reported is the composite score, followed by the an
-approximate Mflop rate for each kernel.
-
-To run the "large" version of this benchmark (with data structures
-that typically do not fit in cache) use
-```shell
-./scimark2 -large
-```
diff --git a/tests/scimark2/Random.c b/tests/scimark2/Random.c
deleted file mode 100644
index 932c4324..00000000
--- a/tests/scimark2/Random.c
+++ /dev/null
@@ -1,131 +0,0 @@
-#include <stdlib.h>
-
-#include "Random.h"
-
-/* static const int mdig = 32; */
-#define MDIG 32
-
-/* static const int one = 1; */
-#define ONE 1
-
-static const int m1 = (ONE << (MDIG - 2)) + ((ONE << (MDIG - 2)) - ONE);
-static const int m2 = ONE << MDIG / 2;
-
-/* For mdig = 32 : m1 =          2147483647, m2 =      65536
- *  For mdig = 64 : m1 = 9223372036854775807, m2 = 4294967296
- */
-
-/* move to initialize() because compiler could not resolve as a constant. */
-
-static /*const*/ double dm1; /*  = 1.0 / (double) m1; */
-
-/* private methods (defined below, but not in Random.h */
-
-static void initialize(Random R, int seed);
-
-Random new_Random_seed(int seed)
-{
-    Random R = (Random) malloc(sizeof(Random_struct));
-
-    initialize(R, seed);
-    R->left = 0.0;
-    R->right = 1.0;
-    R->width = 1.0;
-
-    return R;
-}
-
-void Random_delete(Random R)
-{
-    free(R);
-}
-
-/* Returns the next random number in the sequence.  */
-double Random_nextDouble(Random R)
-{
-    int k;
-
-    int I = R->i;
-    int J = R->j;
-    int *m = R->m;
-
-    k = m[I] - m[J];
-    if (k < 0)
-        k += m1;
-    R->m[J] = k;
-
-    if (I == 0)
-        I = 16;
-    else
-        I--;
-    R->i = I;
-
-    if (J == 0)
-        J = 16;
-    else
-        J--;
-    R->j = J;
-
-    return dm1 * (double) k;
-}
-
-/*--------------------------------------------------------------------
-                           PRIVATE METHODS
-  ----------------------------------------------------------------- */
-
-static void initialize(Random R, int seed)
-{
-    int jseed, k0, k1, j0, j1, iloop;
-
-    dm1 = 1.0 / (double) m1;
-
-    R->seed = seed;
-
-    if (seed < 0)
-        seed = -seed;                /* seed = abs(seed) */
-    jseed = (seed < m1 ? seed : m1); /* jseed = min(seed, m1) */
-    if (jseed % 2 == 0)
-        --jseed;
-    k0 = 9069 % m2;
-    k1 = 9069 / m2;
-    j0 = jseed % m2;
-    j1 = jseed / m2;
-    for (iloop = 0; iloop < 17; ++iloop) {
-        jseed = j0 * k0;
-        j1 = (jseed / m2 + j0 * k1 + j1 * k0) % (m2 / 2);
-        j0 = jseed % m2;
-        R->m[iloop] = j0 + m2 * j1;
-    }
-    R->i = 4;
-    R->j = 16;
-}
-
-double *RandomVector(int N, Random R)
-{
-    double *x = (double *) malloc(sizeof(double) * N);
-
-    for (int i = 0; i < N; i++)
-        x[i] = Random_nextDouble(R);
-
-    return x;
-}
-
-double **RandomMatrix(int M, int N, Random R)
-{
-    /* allocate matrix */
-    double **A = (double **) malloc(sizeof(double *) * M);
-
-    if (A == NULL)
-        return NULL;
-
-    for (int i = 0; i < M; i++) {
-        A[i] = (double *) malloc(sizeof(double) * N);
-        if (A[i] == NULL) {
-            free(A);
-            return NULL;
-        }
-        for (int j = 0; j < N; j++)
-            A[i][j] = Random_nextDouble(R);
-    }
-    return A;
-}
diff --git a/tests/scimark2/Random.h b/tests/scimark2/Random.h
deleted file mode 100644
index f70cbd12..00000000
--- a/tests/scimark2/Random.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#pragma once
-
-typedef struct {
-    int m[17];
-    int seed;
-    int i;        /* originally = 4 */
-    int j;        /* originally =  16 */
-    double left;  /*= 0.0; */
-    double right; /* = 1.0; */
-    double width; /* = 1.0; */
-} Random_struct, *Random;
-
-Random new_Random_seed(int seed);
-double Random_nextDouble(Random R);
-void Random_delete(Random R);
-double *RandomVector(int N, Random R);
-double **RandomMatrix(int M, int N, Random R);
diff --git a/tests/scimark2/SOR.c b/tests/scimark2/SOR.c
deleted file mode 100644
index fa2cfc42..00000000
--- a/tests/scimark2/SOR.c
+++ /dev/null
@@ -1,35 +0,0 @@
-#include "SOR.h"
-
-double SOR_num_flops(int M, int N, int num_iterations)
-{
-    double Md = (double) M;
-    double Nd = (double) N;
-    double num_iterD = (double) num_iterations;
-
-    return (Md - 1) * (Nd - 1) * num_iterD * 6.0;
-}
-
-void SOR_execute(int M, int N, double omega, double **G, int num_iterations)
-{
-    double omega_over_four = omega * 0.25;
-    double one_minus_omega = 1.0 - omega;
-
-    /* update interior points */
-    int Mm1 = M - 1;
-    int Nm1 = N - 1;
-    double *Gi;
-    double *Gim1;
-    double *Gip1;
-
-    for (int p = 0; p < num_iterations; p++) {
-        for (int i = 1; i < Mm1; i++) {
-            Gi = G[i];
-            Gim1 = G[i - 1];
-            Gip1 = G[i + 1];
-            for (int j = 1; j < Nm1; j++)
-                Gi[j] = omega_over_four *
-                            (Gim1[j] + Gip1[j] + Gi[j - 1] + Gi[j + 1]) +
-                        one_minus_omega * Gi[j];
-        }
-    }
-}
diff --git a/tests/scimark2/SOR.h b/tests/scimark2/SOR.h
deleted file mode 100644
index 7ca82756..00000000
--- a/tests/scimark2/SOR.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#pragma once
-
-double SOR_num_flops(int M, int N, int num_iterations);
-void SOR_execute(int M, int N, double omega, double **G, int num_iterations);
diff --git a/tests/scimark2/SparseCompRow.c b/tests/scimark2/SparseCompRow.c
deleted file mode 100644
index 0d2280a2..00000000
--- a/tests/scimark2/SparseCompRow.c
+++ /dev/null
@@ -1,37 +0,0 @@
-/* multiple iterations used to make kernel have roughly same granulairty as
- * other Scimark kernels.
- */
-double SparseCompRow_num_flops(int N, int nz, int num_iterations)
-{
-    /* Note that if nz does not divide N evenly, then the actual number of
-     * nonzeros used is adjusted slightly.
-     */
-    int actual_nz = (nz / N) * N;
-    return ((double) actual_nz) * 2.0 * ((double) num_iterations);
-}
-
-/* computes  a matrix-vector multiply with a sparse matrix held in compress-row
- * format.  If the size of the matrix in MxN with nz nonzeros, then the val[]
- * is the nz nonzeros, with its ith entry in column col[i]. The integer vector
- * row[] is of size M+1 and row[i] points to the begining of the ith row in
- * col[].
- */
-void SparseCompRow_matmult(int M,
-                           double *y,
-                           const double *val,
-                           const int *row,
-                           const int *col,
-                           const double *x,
-                           int NUM_ITERATIONS)
-{
-    for (int reps = 0; reps < NUM_ITERATIONS; reps++) {
-        for (int r = 0; r < M; r++) {
-            double sum = 0.0;
-            int rowR = row[r];
-            int rowRp1 = row[r + 1];
-            for (int i = rowR; i < rowRp1; i++)
-                sum += x[col[i]] * val[i];
-            y[r] = sum;
-        }
-    }
-}
diff --git a/tests/scimark2/SparseCompRow.h b/tests/scimark2/SparseCompRow.h
deleted file mode 100644
index c1efc067..00000000
--- a/tests/scimark2/SparseCompRow.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#pragma once
-
-double SparseCompRow_num_flops(int N, int nz, int num_iterations);
-
-void SparseCompRow_matmult(int M,
-                           double *y,
-                           double *val,
-                           int *row,
-                           int *col,
-                           double *x,
-                           int NUM_ITERATIONS);
diff --git a/tests/scimark2/Stopwatch.c b/tests/scimark2/Stopwatch.c
deleted file mode 100644
index 99cd0206..00000000
--- a/tests/scimark2/Stopwatch.c
+++ /dev/null
@@ -1,60 +0,0 @@
-#include <stdlib.h>
-
-#include "Stopwatch.h"
-
-static double seconds()
-{
-    return ((double) clock()) / (double) CLOCKS_PER_SEC;
-}
-
-void Stopwtach_reset(Stopwatch Q)
-{
-    Q->running = false;
-    Q->last_time = 0.0;
-    Q->total = 0.0;
-}
-
-Stopwatch new_Stopwatch(void)
-{
-    Stopwatch S = (Stopwatch) malloc(sizeof(struct Stopwatch));
-    if (S == NULL)
-        return NULL;
-
-    Stopwtach_reset(S);
-    return S;
-}
-
-void Stopwatch_delete(Stopwatch S)
-{
-    if (S != NULL)
-        free(S);
-}
-
-/* Start resets the timer to 0.0; use resume for continued total */
-
-void Stopwatch_start(Stopwatch Q)
-{
-    if (!(Q->running)) {
-        Q->running = true;
-        Q->total = 0.0;
-        Q->last_time = seconds();
-    }
-}
-
-void Stopwatch_stop(Stopwatch Q)
-{
-    if (Q->running) {
-        Q->total += seconds() - Q->last_time;
-        Q->running = false;
-    }
-}
-
-double Stopwatch_read(Stopwatch Q)
-{
-    if (Q->running) {
-        double t = seconds();
-        Q->total += t - Q->last_time;
-        Q->last_time = t;
-    }
-    return Q->total;
-}
diff --git a/tests/scimark2/Stopwatch.h b/tests/scimark2/Stopwatch.h
deleted file mode 100644
index 10852120..00000000
--- a/tests/scimark2/Stopwatch.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#pragma once
-
-#include <stdbool.h>
-#include <time.h>
-
-struct Stopwatch {
-    bool running;
-    double last_time;
-    double total;
-};
-typedef struct Stopwatch *Stopwatch;
-
-Stopwatch new_Stopwatch(void);
-void Stopwtach_reset(Stopwatch Q);
-void Stopwatch_delete(Stopwatch S);
-void Stopwatch_start(Stopwatch Q);
-void Stopwatch_stop(Stopwatch Q);
-double Stopwatch_read(Stopwatch Q);
diff --git a/tests/scimark2/array.c b/tests/scimark2/array.c
deleted file mode 100644
index 6dd60171..00000000
--- a/tests/scimark2/array.c
+++ /dev/null
@@ -1,63 +0,0 @@
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "array.h"
-
-double **new_Array2D_double(int M, int N)
-{
-    int i = 0;
-    bool failed = false;
-
-    double **A = (double **) malloc(sizeof(double *) * M);
-    if (A == NULL)
-        return NULL;
-
-    for (i = 0; i < M; i++) {
-        A[i] = (double *) malloc(N * sizeof(double));
-        if (A[i] == NULL) {
-            failed = true;
-            break;
-        }
-    }
-
-    /* if we didn't successfully allocate all rows of A      */
-    /* clean up any allocated memory (i.e. go back and free  */
-    /* previous rows) and return NULL                        */
-    if (failed) {
-        i--;
-        for (; i <= 0; i--)
-            free(A[i]);
-        free(A);
-        return NULL;
-    }
-    return A;
-}
-void Array2D_double_delete(int M, int N, double **A)
-{
-    if (A == NULL)
-        return;
-
-    for (int i = 0; i < M; i++)
-        free(A[i]);
-
-    free(A);
-}
-
-void Array2D_double_copy(int M, int N, double **B, double **A)
-{
-    int remainder = N & 3; /* N mod 4; */
-
-    for (int i = 0; i < M; i++) {
-        double *Bi = B[i];
-        double *Ai = A[i];
-        for (int j = 0; j < remainder; j++)
-            Bi[j] = Ai[j];
-        for (int j = remainder; j < N; j += 4) {
-            Bi[j] = Ai[j];
-            Bi[j + 1] = Ai[j + 1];
-            Bi[j + 2] = Ai[j + 2];
-            Bi[j + 3] = Ai[j + 3];
-        }
-    }
-}
diff --git a/tests/scimark2/array.h b/tests/scimark2/array.h
deleted file mode 100644
index d9f970d3..00000000
--- a/tests/scimark2/array.h
+++ /dev/null
@@ -1,5 +0,0 @@
-#pragma once
-
-double **new_Array2D_double(int M, int N);
-void Array2D_double_delete(int M, int N, double **A);
-void Array2D_double_copy(int M, int N, double **B, double **A);
diff --git a/tests/scimark2/constants.h b/tests/scimark2/constants.h
deleted file mode 100644
index 9e65aeb0..00000000
--- a/tests/scimark2/constants.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#pragma once
-
-const double RESOLUTION_DEFAULT = 2.0; /* secs (normally 2.0) */
-const int RANDOM_SEED = 101010;
-
-/* default: small (cache-contained) problem sizes */
-const int FFT_SIZE = 1024; /* must be a power of two */
-const int SOR_SIZE = 100;  /* NxN grid */
-const int SPARSE_SIZE_M = 1000;
-const int SPARSE_SIZE_nz = 5000;
-const int LU_SIZE = 100;
-
-/* large (out-of-cache) problem sizes */
-const int LG_FFT_SIZE = 1048576; /* must be a power of two */
-const int LG_SOR_SIZE = 1000;    /*  NxN grid  */
-const int LG_SPARSE_SIZE_M = 100000;
-const int LG_SPARSE_SIZE_nz = 1000000;
-const int LG_LU_SIZE = 1000;
-
-/* tiny problem sizes (used to mainly to preload network classes     */
-/*                     for applet, so that network download times    */
-/*                     are factored out of benchmark.)               */
-/*                                                                   */
-const int TINY_FFT_SIZE = 16; /* must be a power of two */
-const int TINY_SOR_SIZE = 10; /* NxN grid */
-const int TINY_SPARSE_SIZE_M = 10;
-const int TINY_SPARSE_SIZE_N = 10;
-const int TINY_SPARSE_SIZE_nz = 50;
-const int TINY_LU_SIZE = 10;
diff --git a/tests/scimark2/kernel.c b/tests/scimark2/kernel.c
deleted file mode 100644
index 006c9ae4..00000000
--- a/tests/scimark2/kernel.c
+++ /dev/null
@@ -1,209 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "FFT.h"
-#include "LU.h"
-#include "MonteCarlo.h"
-#include "Random.h"
-#include "SOR.h"
-#include "SparseCompRow.h"
-#include "Stopwatch.h"
-#include "array.h"
-
-double kernel_measureFFT(int N, double mintime, Random R)
-{
-    /* initialize FFT data as complex (N real/img pairs) */
-    int twoN = 2 * N;
-    double *x = RandomVector(twoN, R);
-    long cycles = 1;
-    Stopwatch Q = new_Stopwatch();
-    double result = 0.0;
-
-    while (1) {
-        Stopwatch_start(Q);
-        for (int i = 0; i < cycles; i++) {
-            FFT_transform(twoN, x); /* forward transform */
-            FFT_inverse(twoN, x);   /* backward transform */
-        }
-        Stopwatch_stop(Q);
-        if (Stopwatch_read(Q) >= mintime)
-            break;
-
-        cycles *= 2;
-    }
-    /* approx Mflops */
-
-    result = FFT_num_flops(N) * cycles / Stopwatch_read(Q) * 1.0e-6;
-    Stopwatch_delete(Q);
-    free(x);
-    return result;
-}
-
-double kernel_measureSOR(int N, double min_time, Random R)
-{
-    double **G = RandomMatrix(N, N, R);
-    double result = 0.0;
-
-    Stopwatch Q = new_Stopwatch();
-    int cycles = 1;
-    while (1) {
-        Stopwatch_start(Q);
-        SOR_execute(N, N, 1.25, G, cycles);
-        Stopwatch_stop(Q);
-
-        if (Stopwatch_read(Q) >= min_time)
-            break;
-
-        cycles *= 2;
-    }
-    /* approx Mflops */
-
-    result = SOR_num_flops(N, N, cycles) / Stopwatch_read(Q) * 1.0e-6;
-    Stopwatch_delete(Q);
-    Array2D_double_delete(N, N, G);
-    return result;
-}
-
-double kernel_measureMonteCarlo(double min_time, Random R)
-{
-    double result = 0.0;
-    Stopwatch Q = new_Stopwatch();
-
-    int cycles = 1;
-    while (1) {
-        Stopwatch_start(Q);
-        MonteCarlo_integrate(cycles);
-        Stopwatch_stop(Q);
-        if (Stopwatch_read(Q) >= min_time)
-            break;
-
-        cycles *= 2;
-    }
-    /* approx Mflops */
-    result = MonteCarlo_num_flops(cycles) / Stopwatch_read(Q) * 1.0e-6;
-    Stopwatch_delete(Q);
-    return result;
-}
-
-
-double kernel_measureSparseMatMult(int N, int nz, double min_time, Random R)
-{
-    /* initialize vector multipliers and storage for result */
-    /* y = A*y;  */
-
-    double *x = RandomVector(N, R);
-    double *y = (double *) malloc(sizeof(double) * N);
-
-    double result = 0.0;
-
-    // initialize square sparse matrix
-    //
-    // for this test, we create a sparse matrix with M/nz nonzeros
-    // per row, with spaced-out evenly between the begining of the
-    // row to the main diagonal.  Thus, the resulting pattern looks
-    // like
-    //             +-----------------+
-    //             +*                +
-    //             +***              +
-    //             +* * *            +
-    //             +** *  *          +
-    //             +**  *   *        +
-    //             +* *   *   *      +
-    //             +*  *   *    *    +
-    //             +*   *    *    *  +
-    //             +-----------------+
-    //
-    // (as best reproducible with integer artihmetic)
-    // Note that the first nr rows will have elements past
-    // the diagonal.
-    int nr = nz / N;  /* average number of nonzeros per row  */
-    int anz = nr * N; /* _actual_ number of nonzeros         */
-
-    double *val = RandomVector(anz, R);
-    int *col = (int *) malloc(sizeof(int) * nz);
-    int *row = (int *) malloc(sizeof(int) * (N + 1));
-    int r = 0;
-    int cycles = 1;
-
-    Stopwatch Q = new_Stopwatch();
-
-    row[0] = 0;
-    for (r = 0; r < N; r++) {
-        /* initialize elements for row r */
-        int rowr = row[r];
-        int step = r / nr;
-        int i = 0;
-
-        row[r + 1] = rowr + nr;
-        if (step < 1)
-            step = 1; /* take at least unit steps */
-
-
-        for (i = 0; i < nr; i++)
-            col[rowr + i] = i * step;
-    }
-
-    while (1) {
-        Stopwatch_start(Q);
-        SparseCompRow_matmult(N, y, val, row, col, x, cycles);
-        Stopwatch_stop(Q);
-        if (Stopwatch_read(Q) >= min_time)
-            break;
-
-        cycles *= 2;
-    }
-
-    /* approx Mflops */
-    result =
-        SparseCompRow_num_flops(N, nz, cycles) / Stopwatch_read(Q) * 1.0e-6;
-
-    Stopwatch_delete(Q);
-    free(row);
-    free(col);
-    free(val);
-    free(y);
-    free(x);
-
-    return result;
-}
-
-double kernel_measureLU(int N, double min_time, Random R)
-{
-    double **A = NULL;
-    double **lu = NULL;
-    int *pivot = NULL;
-
-    Stopwatch Q = new_Stopwatch();
-    double result = 0.0;
-    int cycles = 1;
-
-    if ((A = RandomMatrix(N, N, R)) == NULL)
-        exit(1);
-    if ((lu = new_Array2D_double(N, N)) == NULL)
-        exit(1);
-    if ((pivot = (int *) malloc(N * sizeof(int))) == NULL)
-        exit(1);
-
-    while (1) {
-        Stopwatch_start(Q);
-        for (int i = 0; i < cycles; i++) {
-            Array2D_double_copy(N, N, lu, A);
-            LU_factor(N, N, lu, pivot);
-        }
-        Stopwatch_stop(Q);
-        if (Stopwatch_read(Q) >= min_time)
-            break;
-
-        cycles *= 2;
-    }
-
-    /* approx Mflops */
-    result = LU_num_flops(N) * cycles / Stopwatch_read(Q) * 1.0e-6;
-
-    Stopwatch_delete(Q);
-    free(pivot);
-    Array2D_double_delete(N, N, lu);
-    Array2D_double_delete(N, N, A);
-
-    return result;
-}
diff --git a/tests/scimark2/kernel.h b/tests/scimark2/kernel.h
deleted file mode 100644
index 66ff1198..00000000
--- a/tests/scimark2/kernel.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#pragma once
-
-double kernel_measureFFT(int FFT_size, double min_time, Random R);
-double kernel_measureSOR(int SOR_size, double min_time, Random R);
-double kernel_measureMonteCarlo(double min_time, Random R);
-double kernel_measureSparseMatMult(int Sparse_size_N,
-                                   int Sparse_size_nz,
-                                   double min_time,
-                                   Random R);
-double kernel_measureLU(int LU_size, double min_time, Random R);
diff --git a/tests/scimark2/scimark2.c b/tests/scimark2/scimark2.c
deleted file mode 100644
index 0e245e21..00000000
--- a/tests/scimark2/scimark2.c
+++ /dev/null
@@ -1,88 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "Random.h"
-#include "constants.h"
-#include "kernel.h"
-
-static void print_banner()
-{
-    printf(
-        "**                                                              **\n");
-    printf(
-        "** SciMark2 Numeric Benchmark, see http://math.nist.gov/scimark **\n");
-    printf(
-        "** for details. (Results can be submitted to pozo@nist.gov)     **\n");
-    printf(
-        "**                                                              **\n");
-}
-
-int main(int argc, char *argv[])
-{
-    /* default to the (small) cache-contained version */
-    double min_time = RESOLUTION_DEFAULT;
-
-    int FFT_size = FFT_SIZE;
-    int SOR_size = SOR_SIZE;
-    int Sparse_size_M = SPARSE_SIZE_M;
-    int Sparse_size_nz = SPARSE_SIZE_nz;
-    int LU_size = LU_SIZE;
-
-    /* run the benchmark */
-    double res[6] = {0.0};
-    Random R = new_Random_seed(RANDOM_SEED);
-
-    if (argc > 1) {
-        int current_arg = 1;
-
-        if (strcmp(argv[1], "-help") == 0 || strcmp(argv[1], "-h") == 0) {
-            fprintf(stderr, "Usage: [-large] [minimum_time]\n");
-            exit(0);
-        }
-
-        if (strcmp(argv[1], "-large") == 0) {
-            FFT_size = LG_FFT_SIZE;
-            SOR_size = LG_SOR_SIZE;
-            Sparse_size_M = LG_SPARSE_SIZE_M;
-            Sparse_size_nz = LG_SPARSE_SIZE_nz;
-            LU_size = LG_LU_SIZE;
-
-            current_arg++;
-        }
-
-        if (current_arg < argc) {
-            min_time = atof(argv[current_arg]);
-        }
-    }
-
-    print_banner();
-    printf("Using %10.2f seconds min time per kenel.\n", min_time);
-
-    res[1] = kernel_measureFFT(FFT_size, min_time, R);
-    res[2] = kernel_measureSOR(SOR_size, min_time, R);
-    res[3] = kernel_measureMonteCarlo(min_time, R);
-    res[4] =
-        kernel_measureSparseMatMult(Sparse_size_M, Sparse_size_nz, min_time, R);
-    res[5] = kernel_measureLU(LU_size, min_time, R);
-
-
-
-    res[0] = (res[1] + res[2] + res[3] + res[4] + res[5]) / 5;
-
-    /* print out results  */
-    printf("Composite Score:        %8.2f\n", res[0]);
-    printf("FFT             Mflops: %8.2f    (N=%d)\n", res[1], FFT_size);
-    printf("SOR             Mflops: %8.2f    (%d x %d)\n", res[2], SOR_size,
-           SOR_size);
-    printf("MonteCarlo:     Mflops: %8.2f\n", res[3]);
-    printf("Sparse matmult  Mflops: %8.2f    (N=%d, nz=%d)\n", res[4],
-           Sparse_size_M, Sparse_size_nz);
-    printf("LU              Mflops: %8.2f    (M=%d, N=%d)\n", res[5], LU_size,
-           LU_size);
-
-
-    Random_delete(R);
-
-    return 0;
-}