diff --git a/.gitignore b/.gitignore index 60dad509..4239ec66 100644 --- a/.gitignore +++ b/.gitignore @@ -25,5 +25,6 @@ build/sail_cSim/ tests/**/*.elf tests/arch-test-target/config.ini tests/arch-test-target/sail_cSim/riscv_sim_RV32 +tests/scimark2/ __pycache__/ src/rv32_jit.c diff --git a/.gitmodules b/.gitmodules index b9b99ffa..24acaf30 100644 --- a/.gitmodules +++ b/.gitmodules @@ -23,3 +23,11 @@ path = src/ieeelib url = https://github.com/sysprog21/ieeelib shallow = true +[submodule "tests/doom"] + path = tests/doom + url = https://github.com/sysprog21/doom_riscv + shallow = true +[submodule "tests/quake"] + path = tests/quake + url = https://github.com/sysprog21/quake-embedded + shallow = true diff --git a/Makefile b/Makefile index 3c21b459..f0c6ebe6 100644 --- a/Makefile +++ b/Makefile @@ -275,15 +275,15 @@ misalign: $(BIN) artifact # Non-trivial demonstration programs ifeq ($(call has, SDL), 1) -doom_action := (cd $(OUT); ../$(BIN) doom.elf) +doom_action := (cd $(OUT); ../$(BIN) riscv32/doom) doom_deps += $(DOOM_DATA) $(BIN) -doom: $(doom_deps) +doom: artifact $(doom_deps) $(doom_action) ifeq ($(call has, EXT_F), 1) -quake_action := (cd $(OUT); ../$(BIN) quake.elf) +quake_action := (cd $(OUT); ../$(BIN) riscv32/quake) quake_deps += $(QUAKE_DATA) $(BIN) -quake: $(quake_deps) +quake: artifact $(quake_deps) $(quake_action) endif endif diff --git a/build/doom.elf b/build/doom.elf deleted file mode 100644 index d3c51ca0..00000000 Binary files a/build/doom.elf and /dev/null differ diff --git a/build/quake.elf b/build/quake.elf deleted file mode 100644 index 0a4971f1..00000000 Binary files a/build/quake.elf and /dev/null differ diff --git a/build/scimark2.elf b/build/scimark2.elf deleted file mode 100644 index 469e12f6..00000000 Binary files a/build/scimark2.elf and /dev/null differ diff --git a/docs/prebuilt.md b/docs/prebuilt.md index f06e28ad..9ca3a480 100644 --- a/docs/prebuilt.md +++ b/docs/prebuilt.md @@ -36,6 +36,7 @@ The prebuilt binaries in `rv32emu-prebuilt` are built from the following reposit - sha512 - `captcha` : See [tests/captcha.c](/tests/captcha.c) - `donut` : See [tests/donut.c](/tests/donut.c) +- `doom` : See [sysprog21/doom_riscv](https://github.com/sysprog21/doom_riscv) - `fcalc` : See [tests/fcalc.c](/tests/fcalc.c) - `hamilton` : See [tests/hamilton.c](/tests/hamilton.c) - `jit` : See [tests/jit.c](/tests/jit.c) @@ -53,15 +54,16 @@ The prebuilt binaries in `rv32emu-prebuilt` are built from the following reposit - `spirograph` : See [tests/spirograph.c](/tests/spirograph.c) - `uaes` : See [tests/uaes.c](/tests/uaes.c) +To determine performance of the floating point arithmetic, the following RISC-V binaries are built with option `-march=rv32imf`: +- `quake` : See [sysprog21/quake-embedded](https://github.com/sysprog21/quake-embedded) +- `scimark2` : See [Scimark 2.0](https://math.nist.gov/scimark2) + There are still some prebuilt standalone RISC-V binaries under `build/` directory only for testing purpose: - `hello.elf` : See [tests/asm-hello](/tests/asm-hello) - `cc.elf` : See [tests/cc](/tests/cc) - `chacha20.elf` : See [tests/chacha20](/tests/chacha20) -- `doom.elf` : See [sysprog21/doom_riscv](https://github.com/sysprog21/doom_riscv) [RV32M] - `ieee754.elf` : See [tests/ieee754.c](/tests/ieee754.c) [RV32F] - `jit-bf.elf` : See [ezaki-k/xkon_beta](https://github.com/ezaki-k/xkon_beta) -- `quake.elf` : See [sysprog21/quake-embedded](https://github.com/sysprog21/quake-embedded) [RV32F] - `readelf.elf` : See [tests/readelf](/tests/readelf) -- `scimark2.elf` : See [tests/scimark2](/tests/scimark2) [RV32MF] - `smolnes.elf` : See [tests/smolnes](/tests/smolnes.c) [RV32M] diff --git a/mk/artifact.mk b/mk/artifact.mk index 543955af..72450394 100644 --- a/mk/artifact.mk +++ b/mk/artifact.mk @@ -31,6 +31,9 @@ TEST_BENCHES += \ spirograph \ uaes +SCIMARK2_URL := https://math.nist.gov/scimark2/scimark2_1c.zip +SCIMARK2_SHA1 := de278c5b8cef84ab6dda41855052c7bfef919e36 + SHELL_HACK := $(shell mkdir -p $(BIN_DIR)/linux-x86-softfp $(BIN_DIR)/riscv32) ifeq ($(call has, PREBUILT), 1) @@ -38,33 +41,67 @@ ifeq ($(call has, PREBUILT), 1) else # Since rv32emu only supports the dynamic binary translation of integer instruction in tiered compilation currently, # we disable the hardware floating-point and the related SIMD operation of x86. - CFLAGS := -m32 -mno-sse -mno-sse2 -msoft-float -O2 -L$(BIN_DIR) + CFLAGS := -m32 -mno-sse -mno-sse2 -msoft-float -O2 -Wno-unused-result -L$(BIN_DIR) LDFLAGS := -lsoft-fp -lm - CFLAGS_CROSS := -march=rv32im -mabi=ilp32 -O2 + CFLAGS_CROSS := -march=rv32im -mabi=ilp32 -O2 -Wno-implicit-function-declaration LDFLAGS_CROSS := -lm -lsemihost endif -.PHONY: artifact +.PHONY: artifact scimark2 ieeelib -artifact: +artifact: ieeelib scimark2 ifeq ($(call has, PREBUILT), 1) $(Q)$(PRINTF) "Fetching prebuilt executables from \"rv32emu-prebuilt\" ...\n" $(Q)wget -q --show-progress https://github.com/sysprog21/rv32emu-prebuilt/releases/download/$(LATEST_RELEASE)/rv32emu-prebuilt.tar.gz -O- | tar -C build --strip-components=1 -xz else - git submodule update --init ./src/ieeelib $(addprefix ./tests/,$(foreach tb,$(TEST_SUITES),$(tb))) - $(Q)$(MAKE) -C ./src/ieeelib CC=$(CC) CFLAGS="$(CFLAGS)" BINDIR=$(BIN_DIR) + git submodule update --init $(addprefix ./tests/,$(foreach tb,$(TEST_SUITES),$(tb))) $(Q)for tb in $(TEST_SUITES); do \ CC=$(CC) CFLAGS="$(CFLAGS)" LDFLAGS="$(LDFLAGS)" BINDIR=$(BIN_DIR)/linux-x86-softfp $(MAKE) -C ./tests/$$tb; \ done $(Q)for tb in $(TEST_SUITES); do \ CC=$(CROSS_COMPILE)gcc CFLAGS="$(CFLAGS_CROSS)" LDFLAGS="$(LDFLAGS_CROSS)" BINDIR=$(BIN_DIR)/riscv32 $(MAKE) -C ./tests/$$tb; \ done + $(Q)$(PRINTF) "Building standalone testbenches ...\n" $(Q)for tb in $(TEST_BENCHES); do \ - $(CC) $(CFLAGS) -Wno-unused-result -o $(BIN_DIR)/linux-x86-softfp/$$tb ./tests/$$tb.c $(LDFLAGS); \ + $(CC) $(CFLAGS) -o $(BIN_DIR)/linux-x86-softfp/$$tb ./tests/$$tb.c $(LDFLAGS); \ done $(Q)for tb in $(TEST_BENCHES); do \ $(CROSS_COMPILE)gcc $(CFLAGS_CROSS) -o $(BIN_DIR)/riscv32/$$tb ./tests/$$tb.c $(LDFLAGS_CROSS); \ done + + git submodule update --init ./tests/doom ./tests/quake + $(Q)$(PRINTF) "Building doom ...\n" + $(Q)$(MAKE) -C ./tests/doom/src/riscv CROSS=$(CROSS_COMPILE) + $(Q)cp ./tests/doom/src/riscv/doom-riscv.elf $(BIN_DIR)/riscv32/doom + $(Q)$(PRINTF) "Building quake ...\n" + $(Q)cd ./tests/quake && mkdir -p build && cd build && \ + cmake -DCMAKE_TOOLCHAIN_FILE=../port/boards/rv32emu/toolchain.cmake \ + -DCROSS_COMPILE=$(CROSS_COMPILE) \ + -DCMAKE_BUILD_TYPE=RELEASE -DBOARD_NAME=rv32emu .. && \ + make + $(Q)cp ./tests/quake/build/port/boards/rv32emu/quake $(BIN_DIR)/riscv32/quake +endif + +scimark2: +ifeq ($(call has, PREBUILT), 0) + $(Q)$(call prologue,"scimark2") + $(Q)$(call download,$(SCIMARK2_URL)) + $(Q)$(call verify,$(SCIMARK2_SHA1),$(notdir $(SCIMARK2_URL))) + $(Q)$(call extract,"./tests/scimark2",$(notdir $(SCIMARK2_URL))) + $(Q)$(call epilogue,$(notdir $(SCIMARK2_URL)),$(SHA1_FILE1),$(SHA1_FILE2)) + $(Q)$(PRINTF) "Building scimark2 ...\n" + $(Q)$(MAKE) -C ./tests/scimark2 clean && $(RM) ./tests/scimark2/scimark2.o + $(Q)$(MAKE) -C ./tests/scimark2 CC=$(CC) CFLAGS="-m32 -O2" + $(Q)cp ./tests/scimark2/scimark2 $(BIN_DIR)/linux-x86-softfp/scimark2 + $(Q)$(MAKE) -C ./tests/scimark2 clean && $(RM) ./tests/scimark2/scimark2.o + $(Q)$(MAKE) -C ./tests/scimark2 CC=$(CROSS_COMPILE)gcc CFLAGS="-march=rv32imf -mabi=ilp32 -O2" + $(Q)cp ./tests/scimark2/scimark2 $(BIN_DIR)/riscv32/scimark2 +endif + +ieeelib: +ifeq ($(call has, PREBUILT), 0) + git submodule update --init ./src/ieeelib + $(Q)$(MAKE) -C ./src/ieeelib CC=$(CC) CFLAGS="$(CFLAGS)" BINDIR=$(BIN_DIR) endif diff --git a/mk/external.mk b/mk/external.mk index 202c2589..a6a94676 100644 --- a/mk/external.mk +++ b/mk/external.mk @@ -25,14 +25,15 @@ define download $(eval _ := $(shell wget -q --show-progress --continue "$(strip $(1))")) endef -# $(1): compressed source(.zip or.gz) +# $(1): destination directory +# $(2): compressed source(.zip or.gz) define extract - $(eval COMPRESSED_SUFFIX := $(suffix $(1))) + $(eval COMPRESSED_SUFFIX := $(suffix $(2))) $(eval COMPRESSED_IS_ZIP := $(filter $(COMPRESSED_SUFFIX),.zip)) $(eval _ := \ $(if $(COMPRESSED_IS_ZIP), \ - ($(eval EXTRACTOR := unzip -d $(OUT) $(1))), \ - ($(eval EXTRACTOR := tar -xf $(1) -C $(OUT))) \ + ($(eval EXTRACTOR := unzip -d $(1) $(2))), \ + ($(eval EXTRACTOR := tar -xf $(2) -C $(1))) \ )) $(eval _ := $(shell $(EXTRACTOR))) endef @@ -90,7 +91,7 @@ define download-extract-verify $($(T)_DATA): $(Q)$$(call prologue,$$@) $(Q)$$(call download,$(strip $($(T)_DATA_URL))) - $(Q)$$(call extract,$(notdir $($(T)_DATA_URL))) + $(Q)$$(call extract,$(OUT),$(notdir $($(T)_DATA_URL))) $(Q)$$(call verify,$($(T)_DATA_SHA1), $($(T)_DATA)) $(Q)$$(call epilogue,$(notdir $($(T)_DATA_URL)),$(SHA1_FILE1),$(SHA1_FILE2)) endef diff --git a/tests/doom b/tests/doom new file mode 160000 index 00000000..9b238b8e --- /dev/null +++ b/tests/doom @@ -0,0 +1 @@ +Subproject commit 9b238b8ef747583e3968f23e0a28057446f79523 diff --git a/tests/quake b/tests/quake new file mode 160000 index 00000000..da5c5ac7 --- /dev/null +++ b/tests/quake @@ -0,0 +1 @@ +Subproject commit da5c5ac793f039eb7e7ce349311cca0309acc10f diff --git a/tests/scimark2/FFT.c b/tests/scimark2/FFT.c deleted file mode 100644 index 33cc6481..00000000 --- a/tests/scimark2/FFT.c +++ /dev/null @@ -1,159 +0,0 @@ -#include -#include -#include - -#include "FFT.h" - -#ifndef M_PI -#define M_PI 3.1415926535897932 -#endif - -/*-----------------------------------------------------------------------*/ - -static int int_log2(int n); - -double FFT_num_flops(int N) -{ - double Nd = (double) N; - double logN = (double) int_log2(N); - - return (5.0 * Nd - 2) * logN + 2 * (Nd + 1); -} - -static int int_log2(int n) -{ - int log = 0; - for (int k = 1; k < n; k *= 2, log++) - ; - if (n != (1 << log)) { - printf("FFT: Data length is not a power of 2!: %d ", n); - exit(1); - } - return log; -} - -static void FFT_transform_internal(int N, double *data, int direction) -{ - int n = N / 2; - int bit = 0; - int logn; - int dual = 1; - - if (n == 1) - return; /* Identity operation! */ - logn = int_log2(n); - - - if (N == 0) - return; - - /* bit reverse the input data for decimation in time algorithm */ - FFT_bitreverse(N, data); - - /* apply fft recursion */ - /* this loop executed int_log2(N) times */ - for (bit = 0; bit < logn; bit++, dual *= 2) { - double w_real = 1.0; - double w_imag = 0.0; - int a, b; - - double theta = 2.0 * direction * M_PI / (2.0 * (double) dual); - double s = sin(theta); - double t = sin(theta / 2.0); - double s2 = 2.0 * t * t; - - for (a = 0, b = 0; b < n; b += 2 * dual) { - int i = 2 * b; - int j = 2 * (b + dual); - - double wd_real = data[j]; - double wd_imag = data[j + 1]; - - data[j] = data[i] - wd_real; - data[j + 1] = data[i + 1] - wd_imag; - data[i] += wd_real; - data[i + 1] += wd_imag; - } - - /* a = 1 .. (dual-1) */ - for (a = 1; a < dual; a++) { - /* trignometric recurrence for w-> exp(i theta) w */ - { - double tmp_real = w_real - s * w_imag - s2 * w_real; - double tmp_imag = w_imag + s * w_real - s2 * w_imag; - w_real = tmp_real; - w_imag = tmp_imag; - } - for (b = 0; b < n; b += 2 * dual) { - int i = 2 * (b + a); - int j = 2 * (b + a + dual); - - double z1_real = data[j]; - double z1_imag = data[j + 1]; - - double wd_real = w_real * z1_real - w_imag * z1_imag; - double wd_imag = w_real * z1_imag + w_imag * z1_real; - - data[j] = data[i] - wd_real; - data[j + 1] = data[i + 1] - wd_imag; - data[i] += wd_real; - data[i + 1] += wd_imag; - } - } - } -} - -void FFT_bitreverse(int N, double *data) -{ - /* This is the Goldrader bit-reversal algorithm */ - int n = N / 2; - int nm1 = n - 1; - int i = 0; - int j = 0; - for (; i < nm1; i++) { - /*int ii = 2*i; */ - int ii = i << 1; - - /*int jj = 2*j; */ - int jj = j << 1; - - /* int k = n / 2 ; */ - int k = n >> 1; - - if (i < j) { - double tmp_real = data[ii]; - double tmp_imag = data[ii + 1]; - data[ii] = data[jj]; - data[ii + 1] = data[jj + 1]; - data[jj] = tmp_real; - data[jj + 1] = tmp_imag; - } - - while (k <= j) { - /*j = j - k ; */ - j -= k; - - /*k = k / 2 ; */ - k >>= 1; - } - j += k; - } -} - -void FFT_transform(int N, double *data) -{ - FFT_transform_internal(N, data, -1); -} - -void FFT_inverse(int N, double *data) -{ - int n = N / 2; - double norm = 0.0; - int i = 0; - FFT_transform_internal(N, data, +1); - - /* Normalize */ - norm = 1 / ((double) n); - for (i = 0; i < N; i++) - data[i] *= norm; -} diff --git a/tests/scimark2/FFT.h b/tests/scimark2/FFT.h deleted file mode 100644 index 932ff75d..00000000 --- a/tests/scimark2/FFT.h +++ /dev/null @@ -1,6 +0,0 @@ -#pragma once - -void FFT_transform(int N, double *data); -void FFT_inverse(int N, double *data); -void FFT_bitreverse(int N, double *data); -double FFT_num_flops(int N); diff --git a/tests/scimark2/LU.c b/tests/scimark2/LU.c deleted file mode 100644 index f2ec57c9..00000000 --- a/tests/scimark2/LU.c +++ /dev/null @@ -1,70 +0,0 @@ -#include "LU.h" -#include - -double LU_num_flops(int N) -{ - /* rougly 2/3*N^3 */ - double Nd = (double) N; - - return (2.0 * Nd * Nd * Nd / 3.0); -} - -int LU_factor(int M, int N, double **A, int *pivot) -{ - int minMN = M < N ? M : N; - - for (int j = 0; j < minMN; j++) { - /* find pivot in column j and test for singularity. */ - int jp = j; - - double t = fabs(A[j][j]); - for (int i = j + 1; i < M; i++) { - double ab = fabs(A[i][j]); - if (ab > t) { - jp = i; - t = ab; - } - } - - pivot[j] = jp; - - /* jp now has the index of maximum element */ - /* of column j, below the diagonal */ - - if (A[jp][j] == 0) - return 1; /* factorization failed because of zero pivot */ - - if (jp != j) { - /* swap rows j and jp */ - double *tA = A[j]; - A[j] = A[jp]; - A[jp] = tA; - } - - if (j < M - 1) { /* compute elements j+1:M of jth column */ - /* note A(j,j), was A(jp,p) previously which was */ - /* guarranteed not to be zero (Label #1) */ - - double recp = 1.0 / A[j][j]; - int k; - for (k = j + 1; k < M; k++) - A[k][j] *= recp; - } - - if (j < minMN - 1) { - /* rank-1 update to trailing submatrix: E = E - x*y; */ - /* E is the region A(j+1:M, j+1:N) */ - /* x is the column vector A(j+1:M,j) */ - /* y is row vector A(j,j+1:N) */ - for (int ii = j + 1; ii < M; ii++) { - double *Aii = A[ii]; - double *Aj = A[j]; - double AiiJ = Aii[j]; - for (int jj = j + 1; jj < N; jj++) - Aii[jj] -= AiiJ * Aj[jj]; - } - } - } - - return 0; -} diff --git a/tests/scimark2/LU.h b/tests/scimark2/LU.h deleted file mode 100644 index 5b5e7ff9..00000000 --- a/tests/scimark2/LU.h +++ /dev/null @@ -1,4 +0,0 @@ -#pragma once - -double LU_num_flops(int N); -int LU_factor(int M, int N, double **A, int *pivot); diff --git a/tests/scimark2/Makefile b/tests/scimark2/Makefile deleted file mode 100644 index 83787341..00000000 --- a/tests/scimark2/Makefile +++ /dev/null @@ -1,27 +0,0 @@ -.PHONY: clean - -include ../../mk/toolchain.mk - -CFLAGS = -march=rv32imf -mabi=ilp32 -CFLAGS += -Wall -Ofast -flto -LDFLAGS = -lm -flto - -.SUFFIXES: .c .o - -%.o: %.c - $(CROSS_COMPILE)gcc $(CFLAGS) -c -o $@ $< - -BIN = scimark2.elf - -all: $(BIN) - -OBJS = \ - FFT.o kernel.o Stopwatch.o Random.o SOR.o SparseCompRow.o \ - array.o MonteCarlo.o LU.o \ - scimark2.o - -$(BIN): $(OBJS) - $(CROSS_COMPILE)gcc $(CFLAGS) -o $@ $^ $(LDFLAGS) - -clean: - rm -f $(OBJS) $(BIN) diff --git a/tests/scimark2/MonteCarlo.c b/tests/scimark2/MonteCarlo.c deleted file mode 100644 index 34696e24..00000000 --- a/tests/scimark2/MonteCarlo.c +++ /dev/null @@ -1,57 +0,0 @@ -#include "Random.h" - -/** - Estimate Pi by approximating the area of a circle. - - How: generate N random numbers in the unit square, (0,0) to (1,1) - and see how are within a radius of 1 or less, i.e. -
-
- sqrt(x^2 + y^2) < r
-
- 
- since the radius is 1.0, we can square both sides - and avoid a sqrt() computation: -
-
-    x^2 + y^2 <= 1.0
-
-  
- this area under the curve is (Pi * r^2)/ 4.0, - and the area of the unit of square is 1.0, - so Pi can be approximated by -
-                # points with x^2+y^2 < 1
-     Pi =~      --------------------------  * 4.0
-                     total # points
-
-  
- -*/ - -static const int SEED = 113; - -double MonteCarlo_num_flops(int Num_samples) -{ - /* 3 flops in x^2+y^2 and 1 flop in random routine */ - return ((double) Num_samples) * 4.0; -} - -double MonteCarlo_integrate(int Num_samples) -{ - Random R = new_Random_seed(SEED); - - int under_curve = 0; - - for (int count = 0; count < Num_samples; count++) { - double x = Random_nextDouble(R); - double y = Random_nextDouble(R); - - if (x * x + y * y <= 1.0) - under_curve++; - } - - Random_delete(R); - - return ((double) under_curve / Num_samples) * 4.0; -} diff --git a/tests/scimark2/MonteCarlo.h b/tests/scimark2/MonteCarlo.h deleted file mode 100644 index ea3f4adf..00000000 --- a/tests/scimark2/MonteCarlo.h +++ /dev/null @@ -1,4 +0,0 @@ -#pragma once - -double MonteCarlo_integrate(int Num_samples); -double MonteCarlo_num_flops(int Num_samples); diff --git a/tests/scimark2/README.md b/tests/scimark2/README.md deleted file mode 100644 index d5316ad5..00000000 --- a/tests/scimark2/README.md +++ /dev/null @@ -1,44 +0,0 @@ -# SciMark2 (C version) - -This is an ANSI C version of the SciMark2 benchmark, translated from the -original Java sources. The intent in making this benchmark available in C is -for mainly for performance comparisons. For more information about SciMark, see http://math.nist.gov/scimark. - -Results of this benchmark can be sent to pozo@nist.gov. - -The program is split up into the main driver (`scimark2.c`) and kernel routines. -A sample makefile is included; however, one could simply write - -```shell -cc -o scimark2 -O *.c -``` - -and then run -```shell -./scimark2 -``` - -This produces an output similar to -``` -** ** -** SciMark2 Numeric Benchmark, see http://math.nist.gov/scimark ** -** for details. (Results can be submitted to pozo@nist.gov) ** -** ** -Using 2.00 seconds min time per kenel. -Composite Score: 65.56 -FFT Mflops: 63.38 (N=1024) -SOR Mflops: 124.80 (100 x 100) -MonteCarlo: Mflops: 16.05 -Sparse matmult Mflops: 59.15 (N=1000, nz=5000) -LU Mflops: 64.40 (M=100, N=100) -0:29.62 Elapsed, 29.620 user sec, 0.010 sys sec, 100.0% utilization. -``` - -The first SciMark number reported is the composite score, followed by the an -approximate Mflop rate for each kernel. - -To run the "large" version of this benchmark (with data structures -that typically do not fit in cache) use -```shell -./scimark2 -large -``` diff --git a/tests/scimark2/Random.c b/tests/scimark2/Random.c deleted file mode 100644 index 932c4324..00000000 --- a/tests/scimark2/Random.c +++ /dev/null @@ -1,131 +0,0 @@ -#include - -#include "Random.h" - -/* static const int mdig = 32; */ -#define MDIG 32 - -/* static const int one = 1; */ -#define ONE 1 - -static const int m1 = (ONE << (MDIG - 2)) + ((ONE << (MDIG - 2)) - ONE); -static const int m2 = ONE << MDIG / 2; - -/* For mdig = 32 : m1 = 2147483647, m2 = 65536 - * For mdig = 64 : m1 = 9223372036854775807, m2 = 4294967296 - */ - -/* move to initialize() because compiler could not resolve as a constant. */ - -static /*const*/ double dm1; /* = 1.0 / (double) m1; */ - -/* private methods (defined below, but not in Random.h */ - -static void initialize(Random R, int seed); - -Random new_Random_seed(int seed) -{ - Random R = (Random) malloc(sizeof(Random_struct)); - - initialize(R, seed); - R->left = 0.0; - R->right = 1.0; - R->width = 1.0; - - return R; -} - -void Random_delete(Random R) -{ - free(R); -} - -/* Returns the next random number in the sequence. */ -double Random_nextDouble(Random R) -{ - int k; - - int I = R->i; - int J = R->j; - int *m = R->m; - - k = m[I] - m[J]; - if (k < 0) - k += m1; - R->m[J] = k; - - if (I == 0) - I = 16; - else - I--; - R->i = I; - - if (J == 0) - J = 16; - else - J--; - R->j = J; - - return dm1 * (double) k; -} - -/*-------------------------------------------------------------------- - PRIVATE METHODS - ----------------------------------------------------------------- */ - -static void initialize(Random R, int seed) -{ - int jseed, k0, k1, j0, j1, iloop; - - dm1 = 1.0 / (double) m1; - - R->seed = seed; - - if (seed < 0) - seed = -seed; /* seed = abs(seed) */ - jseed = (seed < m1 ? seed : m1); /* jseed = min(seed, m1) */ - if (jseed % 2 == 0) - --jseed; - k0 = 9069 % m2; - k1 = 9069 / m2; - j0 = jseed % m2; - j1 = jseed / m2; - for (iloop = 0; iloop < 17; ++iloop) { - jseed = j0 * k0; - j1 = (jseed / m2 + j0 * k1 + j1 * k0) % (m2 / 2); - j0 = jseed % m2; - R->m[iloop] = j0 + m2 * j1; - } - R->i = 4; - R->j = 16; -} - -double *RandomVector(int N, Random R) -{ - double *x = (double *) malloc(sizeof(double) * N); - - for (int i = 0; i < N; i++) - x[i] = Random_nextDouble(R); - - return x; -} - -double **RandomMatrix(int M, int N, Random R) -{ - /* allocate matrix */ - double **A = (double **) malloc(sizeof(double *) * M); - - if (A == NULL) - return NULL; - - for (int i = 0; i < M; i++) { - A[i] = (double *) malloc(sizeof(double) * N); - if (A[i] == NULL) { - free(A); - return NULL; - } - for (int j = 0; j < N; j++) - A[i][j] = Random_nextDouble(R); - } - return A; -} diff --git a/tests/scimark2/Random.h b/tests/scimark2/Random.h deleted file mode 100644 index f70cbd12..00000000 --- a/tests/scimark2/Random.h +++ /dev/null @@ -1,17 +0,0 @@ -#pragma once - -typedef struct { - int m[17]; - int seed; - int i; /* originally = 4 */ - int j; /* originally = 16 */ - double left; /*= 0.0; */ - double right; /* = 1.0; */ - double width; /* = 1.0; */ -} Random_struct, *Random; - -Random new_Random_seed(int seed); -double Random_nextDouble(Random R); -void Random_delete(Random R); -double *RandomVector(int N, Random R); -double **RandomMatrix(int M, int N, Random R); diff --git a/tests/scimark2/SOR.c b/tests/scimark2/SOR.c deleted file mode 100644 index fa2cfc42..00000000 --- a/tests/scimark2/SOR.c +++ /dev/null @@ -1,35 +0,0 @@ -#include "SOR.h" - -double SOR_num_flops(int M, int N, int num_iterations) -{ - double Md = (double) M; - double Nd = (double) N; - double num_iterD = (double) num_iterations; - - return (Md - 1) * (Nd - 1) * num_iterD * 6.0; -} - -void SOR_execute(int M, int N, double omega, double **G, int num_iterations) -{ - double omega_over_four = omega * 0.25; - double one_minus_omega = 1.0 - omega; - - /* update interior points */ - int Mm1 = M - 1; - int Nm1 = N - 1; - double *Gi; - double *Gim1; - double *Gip1; - - for (int p = 0; p < num_iterations; p++) { - for (int i = 1; i < Mm1; i++) { - Gi = G[i]; - Gim1 = G[i - 1]; - Gip1 = G[i + 1]; - for (int j = 1; j < Nm1; j++) - Gi[j] = omega_over_four * - (Gim1[j] + Gip1[j] + Gi[j - 1] + Gi[j + 1]) + - one_minus_omega * Gi[j]; - } - } -} diff --git a/tests/scimark2/SOR.h b/tests/scimark2/SOR.h deleted file mode 100644 index 7ca82756..00000000 --- a/tests/scimark2/SOR.h +++ /dev/null @@ -1,4 +0,0 @@ -#pragma once - -double SOR_num_flops(int M, int N, int num_iterations); -void SOR_execute(int M, int N, double omega, double **G, int num_iterations); diff --git a/tests/scimark2/SparseCompRow.c b/tests/scimark2/SparseCompRow.c deleted file mode 100644 index 0d2280a2..00000000 --- a/tests/scimark2/SparseCompRow.c +++ /dev/null @@ -1,37 +0,0 @@ -/* multiple iterations used to make kernel have roughly same granulairty as - * other Scimark kernels. - */ -double SparseCompRow_num_flops(int N, int nz, int num_iterations) -{ - /* Note that if nz does not divide N evenly, then the actual number of - * nonzeros used is adjusted slightly. - */ - int actual_nz = (nz / N) * N; - return ((double) actual_nz) * 2.0 * ((double) num_iterations); -} - -/* computes a matrix-vector multiply with a sparse matrix held in compress-row - * format. If the size of the matrix in MxN with nz nonzeros, then the val[] - * is the nz nonzeros, with its ith entry in column col[i]. The integer vector - * row[] is of size M+1 and row[i] points to the begining of the ith row in - * col[]. - */ -void SparseCompRow_matmult(int M, - double *y, - const double *val, - const int *row, - const int *col, - const double *x, - int NUM_ITERATIONS) -{ - for (int reps = 0; reps < NUM_ITERATIONS; reps++) { - for (int r = 0; r < M; r++) { - double sum = 0.0; - int rowR = row[r]; - int rowRp1 = row[r + 1]; - for (int i = rowR; i < rowRp1; i++) - sum += x[col[i]] * val[i]; - y[r] = sum; - } - } -} diff --git a/tests/scimark2/SparseCompRow.h b/tests/scimark2/SparseCompRow.h deleted file mode 100644 index c1efc067..00000000 --- a/tests/scimark2/SparseCompRow.h +++ /dev/null @@ -1,11 +0,0 @@ -#pragma once - -double SparseCompRow_num_flops(int N, int nz, int num_iterations); - -void SparseCompRow_matmult(int M, - double *y, - double *val, - int *row, - int *col, - double *x, - int NUM_ITERATIONS); diff --git a/tests/scimark2/Stopwatch.c b/tests/scimark2/Stopwatch.c deleted file mode 100644 index 99cd0206..00000000 --- a/tests/scimark2/Stopwatch.c +++ /dev/null @@ -1,60 +0,0 @@ -#include - -#include "Stopwatch.h" - -static double seconds() -{ - return ((double) clock()) / (double) CLOCKS_PER_SEC; -} - -void Stopwtach_reset(Stopwatch Q) -{ - Q->running = false; - Q->last_time = 0.0; - Q->total = 0.0; -} - -Stopwatch new_Stopwatch(void) -{ - Stopwatch S = (Stopwatch) malloc(sizeof(struct Stopwatch)); - if (S == NULL) - return NULL; - - Stopwtach_reset(S); - return S; -} - -void Stopwatch_delete(Stopwatch S) -{ - if (S != NULL) - free(S); -} - -/* Start resets the timer to 0.0; use resume for continued total */ - -void Stopwatch_start(Stopwatch Q) -{ - if (!(Q->running)) { - Q->running = true; - Q->total = 0.0; - Q->last_time = seconds(); - } -} - -void Stopwatch_stop(Stopwatch Q) -{ - if (Q->running) { - Q->total += seconds() - Q->last_time; - Q->running = false; - } -} - -double Stopwatch_read(Stopwatch Q) -{ - if (Q->running) { - double t = seconds(); - Q->total += t - Q->last_time; - Q->last_time = t; - } - return Q->total; -} diff --git a/tests/scimark2/Stopwatch.h b/tests/scimark2/Stopwatch.h deleted file mode 100644 index 10852120..00000000 --- a/tests/scimark2/Stopwatch.h +++ /dev/null @@ -1,18 +0,0 @@ -#pragma once - -#include -#include - -struct Stopwatch { - bool running; - double last_time; - double total; -}; -typedef struct Stopwatch *Stopwatch; - -Stopwatch new_Stopwatch(void); -void Stopwtach_reset(Stopwatch Q); -void Stopwatch_delete(Stopwatch S); -void Stopwatch_start(Stopwatch Q); -void Stopwatch_stop(Stopwatch Q); -double Stopwatch_read(Stopwatch Q); diff --git a/tests/scimark2/array.c b/tests/scimark2/array.c deleted file mode 100644 index 6dd60171..00000000 --- a/tests/scimark2/array.c +++ /dev/null @@ -1,63 +0,0 @@ -#include -#include -#include - -#include "array.h" - -double **new_Array2D_double(int M, int N) -{ - int i = 0; - bool failed = false; - - double **A = (double **) malloc(sizeof(double *) * M); - if (A == NULL) - return NULL; - - for (i = 0; i < M; i++) { - A[i] = (double *) malloc(N * sizeof(double)); - if (A[i] == NULL) { - failed = true; - break; - } - } - - /* if we didn't successfully allocate all rows of A */ - /* clean up any allocated memory (i.e. go back and free */ - /* previous rows) and return NULL */ - if (failed) { - i--; - for (; i <= 0; i--) - free(A[i]); - free(A); - return NULL; - } - return A; -} -void Array2D_double_delete(int M, int N, double **A) -{ - if (A == NULL) - return; - - for (int i = 0; i < M; i++) - free(A[i]); - - free(A); -} - -void Array2D_double_copy(int M, int N, double **B, double **A) -{ - int remainder = N & 3; /* N mod 4; */ - - for (int i = 0; i < M; i++) { - double *Bi = B[i]; - double *Ai = A[i]; - for (int j = 0; j < remainder; j++) - Bi[j] = Ai[j]; - for (int j = remainder; j < N; j += 4) { - Bi[j] = Ai[j]; - Bi[j + 1] = Ai[j + 1]; - Bi[j + 2] = Ai[j + 2]; - Bi[j + 3] = Ai[j + 3]; - } - } -} diff --git a/tests/scimark2/array.h b/tests/scimark2/array.h deleted file mode 100644 index d9f970d3..00000000 --- a/tests/scimark2/array.h +++ /dev/null @@ -1,5 +0,0 @@ -#pragma once - -double **new_Array2D_double(int M, int N); -void Array2D_double_delete(int M, int N, double **A); -void Array2D_double_copy(int M, int N, double **B, double **A); diff --git a/tests/scimark2/constants.h b/tests/scimark2/constants.h deleted file mode 100644 index 9e65aeb0..00000000 --- a/tests/scimark2/constants.h +++ /dev/null @@ -1,29 +0,0 @@ -#pragma once - -const double RESOLUTION_DEFAULT = 2.0; /* secs (normally 2.0) */ -const int RANDOM_SEED = 101010; - -/* default: small (cache-contained) problem sizes */ -const int FFT_SIZE = 1024; /* must be a power of two */ -const int SOR_SIZE = 100; /* NxN grid */ -const int SPARSE_SIZE_M = 1000; -const int SPARSE_SIZE_nz = 5000; -const int LU_SIZE = 100; - -/* large (out-of-cache) problem sizes */ -const int LG_FFT_SIZE = 1048576; /* must be a power of two */ -const int LG_SOR_SIZE = 1000; /* NxN grid */ -const int LG_SPARSE_SIZE_M = 100000; -const int LG_SPARSE_SIZE_nz = 1000000; -const int LG_LU_SIZE = 1000; - -/* tiny problem sizes (used to mainly to preload network classes */ -/* for applet, so that network download times */ -/* are factored out of benchmark.) */ -/* */ -const int TINY_FFT_SIZE = 16; /* must be a power of two */ -const int TINY_SOR_SIZE = 10; /* NxN grid */ -const int TINY_SPARSE_SIZE_M = 10; -const int TINY_SPARSE_SIZE_N = 10; -const int TINY_SPARSE_SIZE_nz = 50; -const int TINY_LU_SIZE = 10; diff --git a/tests/scimark2/kernel.c b/tests/scimark2/kernel.c deleted file mode 100644 index 006c9ae4..00000000 --- a/tests/scimark2/kernel.c +++ /dev/null @@ -1,209 +0,0 @@ -#include -#include - -#include "FFT.h" -#include "LU.h" -#include "MonteCarlo.h" -#include "Random.h" -#include "SOR.h" -#include "SparseCompRow.h" -#include "Stopwatch.h" -#include "array.h" - -double kernel_measureFFT(int N, double mintime, Random R) -{ - /* initialize FFT data as complex (N real/img pairs) */ - int twoN = 2 * N; - double *x = RandomVector(twoN, R); - long cycles = 1; - Stopwatch Q = new_Stopwatch(); - double result = 0.0; - - while (1) { - Stopwatch_start(Q); - for (int i = 0; i < cycles; i++) { - FFT_transform(twoN, x); /* forward transform */ - FFT_inverse(twoN, x); /* backward transform */ - } - Stopwatch_stop(Q); - if (Stopwatch_read(Q) >= mintime) - break; - - cycles *= 2; - } - /* approx Mflops */ - - result = FFT_num_flops(N) * cycles / Stopwatch_read(Q) * 1.0e-6; - Stopwatch_delete(Q); - free(x); - return result; -} - -double kernel_measureSOR(int N, double min_time, Random R) -{ - double **G = RandomMatrix(N, N, R); - double result = 0.0; - - Stopwatch Q = new_Stopwatch(); - int cycles = 1; - while (1) { - Stopwatch_start(Q); - SOR_execute(N, N, 1.25, G, cycles); - Stopwatch_stop(Q); - - if (Stopwatch_read(Q) >= min_time) - break; - - cycles *= 2; - } - /* approx Mflops */ - - result = SOR_num_flops(N, N, cycles) / Stopwatch_read(Q) * 1.0e-6; - Stopwatch_delete(Q); - Array2D_double_delete(N, N, G); - return result; -} - -double kernel_measureMonteCarlo(double min_time, Random R) -{ - double result = 0.0; - Stopwatch Q = new_Stopwatch(); - - int cycles = 1; - while (1) { - Stopwatch_start(Q); - MonteCarlo_integrate(cycles); - Stopwatch_stop(Q); - if (Stopwatch_read(Q) >= min_time) - break; - - cycles *= 2; - } - /* approx Mflops */ - result = MonteCarlo_num_flops(cycles) / Stopwatch_read(Q) * 1.0e-6; - Stopwatch_delete(Q); - return result; -} - - -double kernel_measureSparseMatMult(int N, int nz, double min_time, Random R) -{ - /* initialize vector multipliers and storage for result */ - /* y = A*y; */ - - double *x = RandomVector(N, R); - double *y = (double *) malloc(sizeof(double) * N); - - double result = 0.0; - - // initialize square sparse matrix - // - // for this test, we create a sparse matrix with M/nz nonzeros - // per row, with spaced-out evenly between the begining of the - // row to the main diagonal. Thus, the resulting pattern looks - // like - // +-----------------+ - // +* + - // +*** + - // +* * * + - // +** * * + - // +** * * + - // +* * * * + - // +* * * * + - // +* * * * + - // +-----------------+ - // - // (as best reproducible with integer artihmetic) - // Note that the first nr rows will have elements past - // the diagonal. - int nr = nz / N; /* average number of nonzeros per row */ - int anz = nr * N; /* _actual_ number of nonzeros */ - - double *val = RandomVector(anz, R); - int *col = (int *) malloc(sizeof(int) * nz); - int *row = (int *) malloc(sizeof(int) * (N + 1)); - int r = 0; - int cycles = 1; - - Stopwatch Q = new_Stopwatch(); - - row[0] = 0; - for (r = 0; r < N; r++) { - /* initialize elements for row r */ - int rowr = row[r]; - int step = r / nr; - int i = 0; - - row[r + 1] = rowr + nr; - if (step < 1) - step = 1; /* take at least unit steps */ - - - for (i = 0; i < nr; i++) - col[rowr + i] = i * step; - } - - while (1) { - Stopwatch_start(Q); - SparseCompRow_matmult(N, y, val, row, col, x, cycles); - Stopwatch_stop(Q); - if (Stopwatch_read(Q) >= min_time) - break; - - cycles *= 2; - } - - /* approx Mflops */ - result = - SparseCompRow_num_flops(N, nz, cycles) / Stopwatch_read(Q) * 1.0e-6; - - Stopwatch_delete(Q); - free(row); - free(col); - free(val); - free(y); - free(x); - - return result; -} - -double kernel_measureLU(int N, double min_time, Random R) -{ - double **A = NULL; - double **lu = NULL; - int *pivot = NULL; - - Stopwatch Q = new_Stopwatch(); - double result = 0.0; - int cycles = 1; - - if ((A = RandomMatrix(N, N, R)) == NULL) - exit(1); - if ((lu = new_Array2D_double(N, N)) == NULL) - exit(1); - if ((pivot = (int *) malloc(N * sizeof(int))) == NULL) - exit(1); - - while (1) { - Stopwatch_start(Q); - for (int i = 0; i < cycles; i++) { - Array2D_double_copy(N, N, lu, A); - LU_factor(N, N, lu, pivot); - } - Stopwatch_stop(Q); - if (Stopwatch_read(Q) >= min_time) - break; - - cycles *= 2; - } - - /* approx Mflops */ - result = LU_num_flops(N) * cycles / Stopwatch_read(Q) * 1.0e-6; - - Stopwatch_delete(Q); - free(pivot); - Array2D_double_delete(N, N, lu); - Array2D_double_delete(N, N, A); - - return result; -} diff --git a/tests/scimark2/kernel.h b/tests/scimark2/kernel.h deleted file mode 100644 index 66ff1198..00000000 --- a/tests/scimark2/kernel.h +++ /dev/null @@ -1,10 +0,0 @@ -#pragma once - -double kernel_measureFFT(int FFT_size, double min_time, Random R); -double kernel_measureSOR(int SOR_size, double min_time, Random R); -double kernel_measureMonteCarlo(double min_time, Random R); -double kernel_measureSparseMatMult(int Sparse_size_N, - int Sparse_size_nz, - double min_time, - Random R); -double kernel_measureLU(int LU_size, double min_time, Random R); diff --git a/tests/scimark2/scimark2.c b/tests/scimark2/scimark2.c deleted file mode 100644 index 0e245e21..00000000 --- a/tests/scimark2/scimark2.c +++ /dev/null @@ -1,88 +0,0 @@ -#include -#include -#include - -#include "Random.h" -#include "constants.h" -#include "kernel.h" - -static void print_banner() -{ - printf( - "** **\n"); - printf( - "** SciMark2 Numeric Benchmark, see http://math.nist.gov/scimark **\n"); - printf( - "** for details. (Results can be submitted to pozo@nist.gov) **\n"); - printf( - "** **\n"); -} - -int main(int argc, char *argv[]) -{ - /* default to the (small) cache-contained version */ - double min_time = RESOLUTION_DEFAULT; - - int FFT_size = FFT_SIZE; - int SOR_size = SOR_SIZE; - int Sparse_size_M = SPARSE_SIZE_M; - int Sparse_size_nz = SPARSE_SIZE_nz; - int LU_size = LU_SIZE; - - /* run the benchmark */ - double res[6] = {0.0}; - Random R = new_Random_seed(RANDOM_SEED); - - if (argc > 1) { - int current_arg = 1; - - if (strcmp(argv[1], "-help") == 0 || strcmp(argv[1], "-h") == 0) { - fprintf(stderr, "Usage: [-large] [minimum_time]\n"); - exit(0); - } - - if (strcmp(argv[1], "-large") == 0) { - FFT_size = LG_FFT_SIZE; - SOR_size = LG_SOR_SIZE; - Sparse_size_M = LG_SPARSE_SIZE_M; - Sparse_size_nz = LG_SPARSE_SIZE_nz; - LU_size = LG_LU_SIZE; - - current_arg++; - } - - if (current_arg < argc) { - min_time = atof(argv[current_arg]); - } - } - - print_banner(); - printf("Using %10.2f seconds min time per kenel.\n", min_time); - - res[1] = kernel_measureFFT(FFT_size, min_time, R); - res[2] = kernel_measureSOR(SOR_size, min_time, R); - res[3] = kernel_measureMonteCarlo(min_time, R); - res[4] = - kernel_measureSparseMatMult(Sparse_size_M, Sparse_size_nz, min_time, R); - res[5] = kernel_measureLU(LU_size, min_time, R); - - - - res[0] = (res[1] + res[2] + res[3] + res[4] + res[5]) / 5; - - /* print out results */ - printf("Composite Score: %8.2f\n", res[0]); - printf("FFT Mflops: %8.2f (N=%d)\n", res[1], FFT_size); - printf("SOR Mflops: %8.2f (%d x %d)\n", res[2], SOR_size, - SOR_size); - printf("MonteCarlo: Mflops: %8.2f\n", res[3]); - printf("Sparse matmult Mflops: %8.2f (N=%d, nz=%d)\n", res[4], - Sparse_size_M, Sparse_size_nz); - printf("LU Mflops: %8.2f (M=%d, N=%d)\n", res[5], LU_size, - LU_size); - - - Random_delete(R); - - return 0; -}