From 9cd48eaba4071a58980ab8c2beb937ad4d04fbf8 Mon Sep 17 00:00:00 2001 From: Meng-Hung Chen Date: Sat, 8 Feb 2025 13:35:15 +0800 Subject: [PATCH 1/5] Add "src/rv32_jit.c" into repository The file is generated by "tools/gen-jit-template.py" To bring up the Linux Kernel to just-in-time (JIT) compilation, we need to update the memory-related operation with the memory management unit. However, the current "src/rv32_jit.c" was generated by the template. That template reduced the rework for the repeated statements, but also reduced the flexibility and the intuitiveness for bring up the new feature. In this commit, we deprecate that template and just use a regular file for the flexibility. --- .gitignore | 1 - Makefile | 6 +- src/rv32_jit.c | 731 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 733 insertions(+), 5 deletions(-) create mode 100644 src/rv32_jit.c diff --git a/.gitignore b/.gitignore index 2bcecbb5..3a04c433 100644 --- a/.gitignore +++ b/.gitignore @@ -30,5 +30,4 @@ tests/arch-test-target/config.ini tests/arch-test-target/sail_cSim/riscv_sim_RV32 tests/scimark2/ __pycache__/ -src/rv32_jit.c src/minimal_dtb.h diff --git a/Makefile b/Makefile index 643e69c0..4de0d08e 100644 --- a/Makefile +++ b/Makefile @@ -257,9 +257,6 @@ ifeq ($(call has, JIT), 1) $(error JIT mode only supports for x64 and arm64 target currently.) endif -src/rv32_jit.c: - $(Q)tools/gen-jit-template.py $(CFLAGS) > $@ - $(OUT)/jit.o: src/jit.c src/rv32_jit.c $(VECHO) " CC\t$@\n" $(Q)$(CC) -o $@ $(CFLAGS) -c -MMD -MF $@.d $< @@ -409,9 +406,10 @@ endif clean: $(VECHO) "Cleaning... " - $(Q)$(RM) $(BIN) $(OBJS) $(DEV_OBJS) $(BUILD_DTB) $(BUILD_DTB2C) $(HIST_BIN) $(HIST_OBJS) $(deps) $(WEB_FILES) $(CACHE_OUT) src/rv32_jit.c + $(Q)$(RM) $(BIN) $(OBJS) $(DEV_OBJS) $(BUILD_DTB) $(BUILD_DTB2C) $(HIST_BIN) $(HIST_OBJS) $(deps) $(WEB_FILES) $(CACHE_OUT) $(Q)-$(RM) $(SOFTFLOAT_LIB) $(Q)$(call notice, [OK]) + distclean: clean $(VECHO) "Deleting all generated files... " $(Q)$(RM) -r $(OUT)/id1 diff --git a/src/rv32_jit.c b/src/rv32_jit.c new file mode 100644 index 00000000..19f43bc2 --- /dev/null +++ b/src/rv32_jit.c @@ -0,0 +1,731 @@ +GEN(nop, {}) +GEN(lui, { + vm_reg[0] = map_vm_reg(state, ir->rd); + emit_load_imm(state, vm_reg[0], ir->imm); +}) +GEN(auipc, { + vm_reg[0] = map_vm_reg(state, ir->rd); + emit_load_imm(state, vm_reg[0], ir->pc + ir->imm); +}) +GEN(jal, { + if (ir->rd) { + vm_reg[0] = map_vm_reg(state, ir->rd); + emit_load_imm(state, vm_reg[0], ir->pc + 4); + } + store_back(state); + emit_jmp(state, ir->pc + ir->imm); + emit_load_imm(state, temp_reg, ir->pc + ir->imm); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); +}) +GEN(jalr, { + vm_reg[0] = ra_load(state, ir->rs1); + emit_mov(state, vm_reg[0], temp_reg); + emit_alu32_imm32(state, 0x81, 0, temp_reg, ir->imm); + emit_alu32_imm32(state, 0x81, 4, temp_reg, ~1U); + if (ir->rd) { + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load_imm(state, vm_reg[1], ir->pc + 4); + } + store_back(state); + parse_branch_history_table(state, ir); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); +}) +GEN(beq, { + ra_load2(state, ir->rs1, ir->rs2); + emit_cmp32(state, vm_reg[1], vm_reg[0]); + store_back(state); + uint32_t jump_loc = state->offset; + emit_jcc_offset(state, 0x84); + if (ir->branch_untaken) { + emit_jmp(state, ir->pc + 4); + } + emit_load_imm(state, temp_reg, ir->pc + 4); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); + emit_jump_target_offset(state, JUMP_LOC, state->offset); + if (ir->branch_taken) { + emit_jmp(state, ir->pc + ir->imm); + } + emit_load_imm(state, temp_reg, ir->pc + ir->imm); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); +}) +GEN(bne, { + ra_load2(state, ir->rs1, ir->rs2); + emit_cmp32(state, vm_reg[1], vm_reg[0]); + store_back(state); + uint32_t jump_loc = state->offset; + emit_jcc_offset(state, 0x85); + if (ir->branch_untaken) { + emit_jmp(state, ir->pc + 4); + } + emit_load_imm(state, temp_reg, ir->pc + 4); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); + emit_jump_target_offset(state, JUMP_LOC, state->offset); + if (ir->branch_taken) { + emit_jmp(state, ir->pc + ir->imm); + } + emit_load_imm(state, temp_reg, ir->pc + ir->imm); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); +}) +GEN(blt, { + ra_load2(state, ir->rs1, ir->rs2); + emit_cmp32(state, vm_reg[1], vm_reg[0]); + store_back(state); + uint32_t jump_loc = state->offset; + emit_jcc_offset(state, 0x8c); + if (ir->branch_untaken) { + emit_jmp(state, ir->pc + 4); + } + emit_load_imm(state, temp_reg, ir->pc + 4); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); + emit_jump_target_offset(state, JUMP_LOC, state->offset); + if (ir->branch_taken) { + emit_jmp(state, ir->pc + ir->imm); + } + emit_load_imm(state, temp_reg, ir->pc + ir->imm); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); +}) +GEN(bge, { + ra_load2(state, ir->rs1, ir->rs2); + emit_cmp32(state, vm_reg[1], vm_reg[0]); + store_back(state); + uint32_t jump_loc = state->offset; + emit_jcc_offset(state, 0x8d); + if (ir->branch_untaken) { + emit_jmp(state, ir->pc + 4); + } + emit_load_imm(state, temp_reg, ir->pc + 4); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); + emit_jump_target_offset(state, JUMP_LOC, state->offset); + if (ir->branch_taken) { + emit_jmp(state, ir->pc + ir->imm); + } + emit_load_imm(state, temp_reg, ir->pc + ir->imm); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); +}) +GEN(bltu, { + ra_load2(state, ir->rs1, ir->rs2); + emit_cmp32(state, vm_reg[1], vm_reg[0]); + store_back(state); + uint32_t jump_loc = state->offset; + emit_jcc_offset(state, 0x82); + if (ir->branch_untaken) { + emit_jmp(state, ir->pc + 4); + } + emit_load_imm(state, temp_reg, ir->pc + 4); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); + emit_jump_target_offset(state, JUMP_LOC, state->offset); + if (ir->branch_taken) { + emit_jmp(state, ir->pc + ir->imm); + } + emit_load_imm(state, temp_reg, ir->pc + ir->imm); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); +}) +GEN(bgeu, { + ra_load2(state, ir->rs1, ir->rs2); + emit_cmp32(state, vm_reg[1], vm_reg[0]); + store_back(state); + uint32_t jump_loc = state->offset; + emit_jcc_offset(state, 0x83); + if (ir->branch_untaken) { + emit_jmp(state, ir->pc + 4); + } + emit_load_imm(state, temp_reg, ir->pc + 4); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); + emit_jump_target_offset(state, JUMP_LOC, state->offset); + if (ir->branch_taken) { + emit_jmp(state, ir->pc + ir->imm); + } + emit_load_imm(state, temp_reg, ir->pc + ir->imm); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); +}) +GEN(lb, { + memory_t *m = PRIV(rv)->mem; + vm_reg[0] = ra_load(state, ir->rs1); + emit_load_imm_sext(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load_sext(state, S8, temp_reg, vm_reg[1], 0); +}) +GEN(lh, { + memory_t *m = PRIV(rv)->mem; + vm_reg[0] = ra_load(state, ir->rs1); + emit_load_imm_sext(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load_sext(state, S16, temp_reg, vm_reg[1], 0); +}) +GEN(lw, { + memory_t *m = PRIV(rv)->mem; + vm_reg[0] = ra_load(state, ir->rs1); + emit_load_imm_sext(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load(state, S32, temp_reg, vm_reg[1], 0); +}) +GEN(lbu, { + memory_t *m = PRIV(rv)->mem; + vm_reg[0] = ra_load(state, ir->rs1); + emit_load_imm_sext(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load(state, S8, temp_reg, vm_reg[1], 0); +}) +GEN(lhu, { + memory_t *m = PRIV(rv)->mem; + vm_reg[0] = ra_load(state, ir->rs1); + emit_load_imm_sext(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load(state, S16, temp_reg, vm_reg[1], 0); +}) +GEN(sb, { + memory_t *m = PRIV(rv)->mem; + vm_reg[0] = ra_load(state, ir->rs1); + emit_load_imm_sext(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = ra_load(state, ir->rs2); + emit_store(state, S8, vm_reg[1], temp_reg, 0); +}) +GEN(sh, { + memory_t *m = PRIV(rv)->mem; + vm_reg[0] = ra_load(state, ir->rs1); + emit_load_imm_sext(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = ra_load(state, ir->rs2); + emit_store(state, S16, vm_reg[1], temp_reg, 0); +}) +GEN(sw, { + memory_t *m = PRIV(rv)->mem; + vm_reg[0] = ra_load(state, ir->rs1); + emit_load_imm_sext(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = ra_load(state, ir->rs2); + emit_store(state, S32, vm_reg[1], temp_reg, 0); +}) +GEN(addi, { + vm_reg[0] = ra_load(state, ir->rs1); + vm_reg[1] = map_vm_reg(state, ir->rd); + if (vm_reg[0] != vm_reg[1]) { + emit_mov(state, vm_reg[0], vm_reg[1]); + } + emit_alu32_imm32(state, 0x81, 0, vm_reg[1], ir->imm); +}) +GEN(slti, { + vm_reg[0] = ra_load(state, ir->rs1); + emit_cmp_imm32(state, vm_reg[0], ir->imm); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load_imm(state, vm_reg[1], 1); + uint32_t jump_loc = state->offset; + emit_jcc_offset(state, 0x8c); + emit_load_imm(state, vm_reg[1], 0); + emit_jump_target_offset(state, JUMP_LOC, state->offset); +}) +GEN(sltiu, { + vm_reg[0] = ra_load(state, ir->rs1); + emit_cmp_imm32(state, vm_reg[0], ir->imm); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load_imm(state, vm_reg[1], 1); + uint32_t jump_loc = state->offset; + emit_jcc_offset(state, 0x82); + emit_load_imm(state, vm_reg[1], 0); + emit_jump_target_offset(state, JUMP_LOC, state->offset); +}) +GEN(xori, { + vm_reg[0] = ra_load(state, ir->rs1); + vm_reg[1] = map_vm_reg(state, ir->rd); + if (vm_reg[0] != vm_reg[1]) { + emit_mov(state, vm_reg[0], vm_reg[1]); + } + emit_alu32_imm32(state, 0x81, 6, vm_reg[1], ir->imm); +}) +GEN(ori, { + vm_reg[0] = ra_load(state, ir->rs1); + vm_reg[1] = map_vm_reg(state, ir->rd); + if (vm_reg[0] != vm_reg[1]) { + emit_mov(state, vm_reg[0], vm_reg[1]); + } + emit_alu32_imm32(state, 0x81, 1, vm_reg[1], ir->imm); +}) +GEN(andi, { + vm_reg[0] = ra_load(state, ir->rs1); + vm_reg[1] = map_vm_reg(state, ir->rd); + if (vm_reg[0] != vm_reg[1]) { + emit_mov(state, vm_reg[0], vm_reg[1]); + } + emit_alu32_imm32(state, 0x81, 4, vm_reg[1], ir->imm); +}) +GEN(slli, { + vm_reg[0] = ra_load(state, ir->rs1); + vm_reg[1] = map_vm_reg(state, ir->rd); + if (vm_reg[0] != vm_reg[1]) { + emit_mov(state, vm_reg[0], vm_reg[1]); + } + emit_alu32_imm8(state, 0xc1, 4, vm_reg[1], ir->imm & 0x1f); +}) +GEN(srli, { + vm_reg[0] = ra_load(state, ir->rs1); + vm_reg[1] = map_vm_reg(state, ir->rd); + if (vm_reg[0] != vm_reg[1]) { + emit_mov(state, vm_reg[0], vm_reg[1]); + } + emit_alu32_imm8(state, 0xc1, 5, vm_reg[1], ir->imm & 0x1f); +}) +GEN(srai, { + vm_reg[0] = ra_load(state, ir->rs1); + vm_reg[1] = map_vm_reg(state, ir->rd); + if (vm_reg[0] != vm_reg[1]) { + emit_mov(state, vm_reg[0], vm_reg[1]); + } + emit_alu32_imm8(state, 0xc1, 7, vm_reg[1], ir->imm & 0x1f); +}) +GEN(add, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + emit_alu32(state, 0x01, temp_reg, vm_reg[2]); +}) +GEN(sub, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + emit_alu32(state, 0x29, temp_reg, vm_reg[2]); +}) +GEN(sll, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + emit_alu32_imm32(state, 0x81, 4, temp_reg, 0x1f); + emit_alu32(state, 0xd3, 4, vm_reg[2]); +}) +GEN(slt, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_cmp32(state, vm_reg[1], vm_reg[0]); + emit_load_imm(state, vm_reg[2], 1); + uint32_t jump_loc = state->offset; + emit_jcc_offset(state, 0x8c); + emit_load_imm(state, vm_reg[2], 0); + emit_jump_target_offset(state, JUMP_LOC, state->offset); +}) +GEN(sltu, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_cmp32(state, vm_reg[1], vm_reg[0]); + emit_load_imm(state, vm_reg[2], 1); + uint32_t jump_loc = state->offset; + emit_jcc_offset(state, 0x82); + emit_load_imm(state, vm_reg[2], 0); + emit_jump_target_offset(state, JUMP_LOC, state->offset); +}) +GEN(xor, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + emit_alu32(state, 0x31, temp_reg, vm_reg[2]); +}) +GEN(srl, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + emit_alu32_imm32(state, 0x81, 4, temp_reg, 0x1f); + emit_alu32(state, 0xd3, 5, vm_reg[2]); +}) +GEN(sra, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + emit_alu32_imm32(state, 0x81, 4, temp_reg, 0x1f); + emit_alu32(state, 0xd3, 7, vm_reg[2]); +}) +GEN(or, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + emit_alu32(state, 0x09, temp_reg, vm_reg[2]); +}) +GEN(and, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + emit_alu32(state, 0x21, temp_reg, vm_reg[2]); +}) +GEN(fence, { assert(NULL); }) +GEN(ecall, { + store_back(state); + emit_load_imm(state, temp_reg, ir->pc); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_call(state, (intptr_t) rv->io.on_ecall); + emit_exit(state); +}) +GEN(ebreak, { + store_back(state); + emit_load_imm(state, temp_reg, ir->pc); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_call(state, (intptr_t) rv->io.on_ebreak); + emit_exit(state); +}) +GEN(wfi, { assert(NULL); }) +GEN(uret, { assert(NULL); }) +GEN(sret, { assert(NULL); }) +GEN(hret, { assert(NULL); }) +GEN(mret, { assert(NULL); }) +GEN(sfencevma, { assert(NULL); }) +GEN(fencei, { assert(NULL); }) +GEN(csrrw, { assert(NULL); }) +GEN(csrrs, { assert(NULL); }) +GEN(csrrc, { assert(NULL); }) +GEN(csrrwi, { assert(NULL); }) +GEN(csrrsi, { assert(NULL); }) +GEN(csrrci, { assert(NULL); }) +GEN(mul, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + muldivmod(state, 0x28, temp_reg, vm_reg[2], 0); +}) +GEN(mulh, { + ra_load2_sext(state, ir->rs1, ir->rs2, true, true); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + muldivmod(state, 0x2f, temp_reg, vm_reg[2], 0); + emit_alu64_imm8(state, 0xc1, 5, vm_reg[2], 32); +}) +GEN(mulhsu, { + ra_load2_sext(state, ir->rs1, ir->rs2, true, false); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + muldivmod(state, 0x2f, temp_reg, vm_reg[2], 0); + emit_alu64_imm8(state, 0xc1, 5, vm_reg[2], 32); +}) +GEN(mulhu, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + muldivmod(state, 0x2f, temp_reg, vm_reg[2], 0); + emit_alu64_imm8(state, 0xc1, 5, vm_reg[2], 32); +}) +GEN(div, { + ra_load2_sext(state, ir->rs1, ir->rs2, true, true); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + muldivmod(state, 0x38, temp_reg, vm_reg[2], 1); +}) +GEN(divu, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + muldivmod(state, 0x38, temp_reg, vm_reg[2], 0); +}) +GEN(rem, { + ra_load2_sext(state, ir->rs1, ir->rs2, true, true); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + muldivmod(state, 0x98, temp_reg, vm_reg[2], 1); +}) +GEN(remu, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + muldivmod(state, 0x98, temp_reg, vm_reg[2], 0); +}) +GEN(lrw, { assert(NULL); }) +GEN(scw, { assert(NULL); }) +GEN(amoswapw, { assert(NULL); }) +GEN(amoaddw, { assert(NULL); }) +GEN(amoxorw, { assert(NULL); }) +GEN(amoandw, { assert(NULL); }) +GEN(amoorw, { assert(NULL); }) +GEN(amominw, { assert(NULL); }) +GEN(amomaxw, { assert(NULL); }) +GEN(amominuw, { assert(NULL); }) +GEN(amomaxuw, { assert(NULL); }) +GEN(flw, { assert(NULL); }) +GEN(fsw, { assert(NULL); }) +GEN(fmadds, { assert(NULL); }) +GEN(fmsubs, { assert(NULL); }) +GEN(fnmsubs, { assert(NULL); }) +GEN(fnmadds, { assert(NULL); }) +GEN(fadds, { assert(NULL); }) +GEN(fsubs, { assert(NULL); }) +GEN(fmuls, { assert(NULL); }) +GEN(fdivs, { assert(NULL); }) +GEN(fsqrts, { assert(NULL); }) +GEN(fsgnjs, { assert(NULL); }) +GEN(fsgnjns, { assert(NULL); }) +GEN(fsgnjxs, { assert(NULL); }) +GEN(fmins, { assert(NULL); }) +GEN(fmaxs, { assert(NULL); }) +GEN(fcvtws, { assert(NULL); }) +GEN(fcvtwus, { assert(NULL); }) +GEN(fmvxw, { assert(NULL); }) +GEN(feqs, { assert(NULL); }) +GEN(flts, { assert(NULL); }) +GEN(fles, { assert(NULL); }) +GEN(fclasss, { assert(NULL); }) +GEN(fcvtsw, { assert(NULL); }) +GEN(fcvtswu, { assert(NULL); }) +GEN(fmvwx, { assert(NULL); }) +GEN(caddi4spn, { + vm_reg[0] = ra_load(state, rv_reg_sp); + vm_reg[1] = map_vm_reg(state, ir->rd); + if (vm_reg[0] != vm_reg[1]) { + emit_mov(state, vm_reg[0], vm_reg[1]); + } + emit_alu32_imm32(state, 0x81, 0, vm_reg[1], (uint16_t) ir->imm); +}) +GEN(clw, { + memory_t *m = PRIV(rv)->mem; + vm_reg[0] = ra_load(state, ir->rs1); + emit_load_imm_sext(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load(state, S32, temp_reg, vm_reg[1], 0); +}) +GEN(csw, { + memory_t *m = PRIV(rv)->mem; + vm_reg[0] = ra_load(state, ir->rs1); + emit_load_imm_sext(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = ra_load(state, ir->rs2); + emit_store(state, S32, vm_reg[1], temp_reg, 0); +}) +GEN(cnop, {}) +GEN(caddi, { + vm_reg[0] = ra_load(state, ir->rd); + emit_alu32_imm32(state, 0x81, 0, vm_reg[0], (int16_t) ir->imm); +}) +GEN(cjal, { + vm_reg[0] = map_vm_reg(state, rv_reg_ra); + emit_load_imm(state, vm_reg[0], ir->pc + 2); + store_back(state); + emit_jmp(state, ir->pc + ir->imm); + emit_load_imm(state, temp_reg, ir->pc + ir->imm); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); +}) +GEN(cli, { + vm_reg[0] = map_vm_reg(state, ir->rd); + emit_load_imm(state, vm_reg[0], ir->imm); +}) +GEN(caddi16sp, { + vm_reg[0] = ra_load(state, ir->rd); + emit_alu32_imm32(state, 0x81, 0, vm_reg[0], ir->imm); +}) +GEN(clui, { + vm_reg[0] = map_vm_reg(state, ir->rd); + emit_load_imm(state, vm_reg[0], ir->imm); +}) +GEN(csrli, { + vm_reg[0] = ra_load(state, ir->rs1); + emit_alu32_imm8(state, 0xc1, 5, vm_reg[0], ir->shamt); +}) +GEN(csrai, { + vm_reg[0] = ra_load(state, ir->rs1); + emit_alu32_imm8(state, 0xc1, 7, vm_reg[0], ir->shamt); +}) +GEN(candi, { + vm_reg[0] = ra_load(state, ir->rs1); + emit_alu32_imm32(state, 0x81, 4, vm_reg[0], ir->imm); +}) +GEN(csub, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + emit_alu32(state, 0x29, temp_reg, vm_reg[2]); +}) +GEN(cxor, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + emit_alu32(state, 0x31, temp_reg, vm_reg[2]); +}) +GEN(cor, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + emit_alu32(state, 0x09, temp_reg, vm_reg[2]); +}) +GEN(cand, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + emit_alu32(state, 0x21, temp_reg, vm_reg[2]); +}) +GEN(cj, { + store_back(state); + emit_jmp(state, ir->pc + ir->imm); + emit_load_imm(state, temp_reg, ir->pc + ir->imm); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); +}) +GEN(cbeqz, { + vm_reg[0] = ra_load(state, ir->rs1); + emit_cmp_imm32(state, vm_reg[0], 0); + store_back(state); + uint32_t jump_loc = state->offset; + emit_jcc_offset(state, 0x84); + if (ir->branch_untaken) { + emit_jmp(state, ir->pc + 2); + } + emit_load_imm(state, temp_reg, ir->pc + 2); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); + emit_jump_target_offset(state, JUMP_LOC, state->offset); + if (ir->branch_taken) { + emit_jmp(state, ir->pc + ir->imm); + } + emit_load_imm(state, temp_reg, ir->pc + ir->imm); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); +}) +GEN(cbnez, { + vm_reg[0] = ra_load(state, ir->rs1); + emit_cmp_imm32(state, vm_reg[0], 0); + store_back(state); + uint32_t jump_loc = state->offset; + emit_jcc_offset(state, 0x85); + if (ir->branch_untaken) { + emit_jmp(state, ir->pc + 2); + } + emit_load_imm(state, temp_reg, ir->pc + 2); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); + emit_jump_target_offset(state, JUMP_LOC, state->offset); + if (ir->branch_taken) { + emit_jmp(state, ir->pc + ir->imm); + } + emit_load_imm(state, temp_reg, ir->pc + ir->imm); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); +}) +GEN(cslli, { + vm_reg[0] = ra_load(state, ir->rd); + emit_alu32_imm8(state, 0xc1, 4, vm_reg[0], (uint8_t) ir->imm); +}) +GEN(clwsp, { + memory_t *m = PRIV(rv)->mem; + vm_reg[0] = ra_load(state, rv_reg_sp); + emit_load_imm_sext(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load(state, S32, temp_reg, vm_reg[1], 0); +}) +GEN(cjr, { + vm_reg[0] = ra_load(state, ir->rs1); + emit_mov(state, vm_reg[0], temp_reg); + store_back(state); + parse_branch_history_table(state, ir); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); +}) +GEN(cmv, { + vm_reg[0] = ra_load(state, ir->rs2); + vm_reg[1] = map_vm_reg(state, ir->rd); + if (vm_reg[0] != vm_reg[1]) { + emit_mov(state, vm_reg[0], vm_reg[1]); + } else { + set_dirty(vm_reg[1], true); + } +}) +GEN(cebreak, { + store_back(state); + emit_load_imm(state, temp_reg, ir->pc); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_call(state, (intptr_t) rv->io.on_ebreak); + emit_exit(state); +}) +GEN(cjalr, { + vm_reg[0] = ra_load(state, ir->rs1); + emit_mov(state, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, rv_reg_ra); + emit_load_imm(state, vm_reg[1], ir->pc + 2); + store_back(state); + parse_branch_history_table(state, ir); + emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); + emit_exit(state); +}) +GEN(cadd, { + ra_load2(state, ir->rs1, ir->rs2); + vm_reg[2] = map_vm_reg(state, ir->rd); + emit_mov(state, vm_reg[1], temp_reg); + emit_mov(state, vm_reg[0], vm_reg[2]); + emit_alu32(state, 0x01, temp_reg, vm_reg[2]); +}) +GEN(cswsp, { + memory_t *m = PRIV(rv)->mem; + vm_reg[0] = ra_load(state, rv_reg_sp); + emit_load_imm_sext(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = ra_load(state, ir->rs2); + emit_store(state, S32, vm_reg[1], temp_reg, 0); +}) +GEN(cflwsp, { assert(NULL); }) +GEN(cfswsp, { assert(NULL); }) +GEN(cflw, { assert(NULL); }) +GEN(cfsw, { assert(NULL); }) +GEN(sh1add, { assert(NULL); }) +GEN(sh2add, { assert(NULL); }) +GEN(sh3add, { assert(NULL); }) +GEN(andn, { assert(NULL); }) +GEN(orn, { assert(NULL); }) +GEN(xnor, { assert(NULL); }) +GEN(clz, { assert(NULL); }) +GEN(ctz, { assert(NULL); }) +GEN(cpop, { assert(NULL); }) +GEN(max, { assert(NULL); }) +GEN(min, { assert(NULL); }) +GEN(maxu, { assert(NULL); }) +GEN(minu, { assert(NULL); }) +GEN(sextb, { assert(NULL); }) +GEN(sexth, { assert(NULL); }) +GEN(zexth, { assert(NULL); }) +GEN(rol, { assert(NULL); }) +GEN(ror, { assert(NULL); }) +GEN(rori, { assert(NULL); }) +GEN(orcb, { assert(NULL); }) +GEN(rev8, { assert(NULL); }) +GEN(clmul, { assert(NULL); }) +GEN(clmulh, { assert(NULL); }) +GEN(clmulr, { assert(NULL); }) +GEN(bclr, { assert(NULL); }) +GEN(bclri, { assert(NULL); }) +GEN(bext, { assert(NULL); }) +GEN(bexti, { assert(NULL); }) +GEN(binv, { assert(NULL); }) +GEN(binvi, { assert(NULL); }) +GEN(bset, { assert(NULL); }) +GEN(bseti, { assert(NULL); }) From e90cc11615f75eb3d26e38e39081a1e4d45af940 Mon Sep 17 00:00:00 2001 From: Meng-Hung Chen Date: Sat, 8 Feb 2025 14:07:17 +0800 Subject: [PATCH 2/5] Refactor "src/system.c" to enhence reusability --- Makefile | 8 ++- src/system.c | 138 ++-------------------------------------- src/system.h | 175 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 187 insertions(+), 134 deletions(-) create mode 100644 src/system.h diff --git a/Makefile b/Makefile index 4de0d08e..a0fb6f3a 100644 --- a/Makefile +++ b/Makefile @@ -13,6 +13,8 @@ CFLAGS = -std=gnu99 $(OPT_LEVEL) -Wall -Wextra -Werror CFLAGS += -Wno-unused-label CFLAGS += -include src/common.h -Isrc/ +OBJS_EXT := + # In the system test suite, the executable is an ELF file (e.g., MMU). # However, the Linux kernel emulation includes the Image, DT, and # root filesystem (rootfs). Therefore, the test suite needs this @@ -36,6 +38,10 @@ $(call set-feature, LOG_COLOR) ENABLE_SYSTEM ?= 0 $(call set-feature, SYSTEM) +ifeq ($(call has, SYSTEM), 1) + OBJS_EXT += system.o +endif + # Definition that bridges: # Device Tree(initrd, memory range) # src/io.c(memory init) @@ -96,8 +102,6 @@ endif # Disable Intel's Control-flow Enforcement Technology (CET) CFLAGS += $(CFLAGS_NO_CET) -OBJS_EXT := - # Integer Multiplication and Division instructions ENABLE_EXT_M ?= 1 $(call set-feature, EXT_M) diff --git a/src/system.c b/src/system.c index e92d71af..1895740c 100644 --- a/src/system.c +++ b/src/system.c @@ -3,21 +3,11 @@ * "LICENSE" for information on usage and redistribution of this file. */ -#if !RV32_HAS(SYSTEM) -#error "Do not manage to build this file unless you enable system support." -#endif - #include -#include "devices/plic.h" -#include "devices/uart.h" -#include "devices/virtio.h" -#include "riscv_private.h" - -#define R 1 -#define W 0 +#include "system.h" -#if RV32_HAS(SYSTEM) && !RV32_HAS(ELF_LOADER) +#if !RV32_HAS(ELF_LOADER) void emu_update_uart_interrupts(riscv_t *rv) { vm_attr_t *attr = PRIV(rv); @@ -29,7 +19,7 @@ void emu_update_uart_interrupts(riscv_t *rv) plic_update_interrupts(attr->plic); } -static void emu_update_vblk_interrupts(riscv_t *rv) +void emu_update_vblk_interrupts(riscv_t *rv) { vm_attr_t *attr = PRIV(rv); if (attr->vblk->interrupt_status) @@ -38,112 +28,6 @@ static void emu_update_vblk_interrupts(riscv_t *rv) attr->plic->active &= ~IRQ_VBLK_BIT; plic_update_interrupts(attr->plic); } -/* - * Linux kernel might create signal frame when returning from trap - * handling, which modifies the SEPC CSR. Thus, the fault instruction - * cannot always redo. For example, invalid memory access causes SIGSEGV. - */ -extern bool need_handle_signal; -#define CHECK_PENDING_SIGNAL(rv, signal_flag) \ - do { \ - signal_flag = (rv->csr_sepc != rv->last_csr_sepc); \ - } while (0) - -#define MMIO_R 1 -#define MMIO_W 0 - -enum SUPPORTED_MMIO { - MMIO_PLIC, - MMIO_UART, - MMIO_VIRTIOBLK, -}; - -/* clang-format off */ -#define MMIO_OP(io, rw) \ - switch(io){ \ - case MMIO_PLIC: \ - IIF(rw)( /* read */ \ - mmio_read_val = plic_read(PRIV(rv)->plic, addr & 0x3FFFFFF); \ - plic_update_interrupts(PRIV(rv)->plic); \ - return mmio_read_val; \ - , /* write */ \ - plic_write(PRIV(rv)->plic, addr & 0x3FFFFFF, val); \ - plic_update_interrupts(PRIV(rv)->plic); \ - return; \ - ) \ - break; \ - case MMIO_UART: \ - IIF(rw)( /* read */ \ - mmio_read_val = u8250_read(PRIV(rv)->uart, addr & 0xFFFFF); \ - emu_update_uart_interrupts(rv); \ - return mmio_read_val; \ - , /* write */ \ - u8250_write(PRIV(rv)->uart, addr & 0xFFFFF, val); \ - emu_update_uart_interrupts(rv); \ - return; \ - ) \ - break; \ - case MMIO_VIRTIOBLK: \ - IIF(rw)( /* read */ \ - mmio_read_val = virtio_blk_read(PRIV(rv)->vblk, addr & 0xFFFFF); \ - emu_update_vblk_interrupts(rv); \ - return mmio_read_val; \ - , /* write */ \ - virtio_blk_write(PRIV(rv)->vblk, addr & 0xFFFFF, val); \ - emu_update_vblk_interrupts(rv); \ - return; \ - ) \ - break; \ - default: \ - rv_log_error("Unknown MMIO type %d", io); \ - break; \ - } -/* clang-format on */ - -#define MMIO_READ() \ - do { \ - uint32_t mmio_read_val; \ - if ((addr >> 28) == 0xF) { /* MMIO at 0xF_______ */ \ - /* 256 regions of 1MiB */ \ - switch ((addr >> 20) & MASK(8)) { \ - case 0x0: \ - case 0x2: /* PLIC (0 - 0x3F) */ \ - MMIO_OP(MMIO_PLIC, MMIO_R); \ - break; \ - case 0x40: /* UART */ \ - MMIO_OP(MMIO_UART, MMIO_R); \ - break; \ - case 0x42: /* Virtio-blk */ \ - MMIO_OP(MMIO_VIRTIOBLK, MMIO_R); \ - break; \ - default: \ - __UNREACHABLE; \ - break; \ - } \ - } \ - } while (0) - -#define MMIO_WRITE() \ - do { \ - if ((addr >> 28) == 0xF) { /* MMIO at 0xF_______ */ \ - /* 256 regions of 1MiB */ \ - switch ((addr >> 20) & MASK(8)) { \ - case 0x0: \ - case 0x2: /* PLIC (0 - 0x3F) */ \ - MMIO_OP(MMIO_PLIC, MMIO_W); \ - break; \ - case 0x40: /* UART */ \ - MMIO_OP(MMIO_UART, MMIO_W); \ - break; \ - case 0x42: /* Virtio-blk */ \ - MMIO_OP(MMIO_VIRTIOBLK, MMIO_W); \ - break; \ - default: \ - __UNREACHABLE; \ - break; \ - } \ - } \ - } while (0) #endif static bool ppn_is_valid(riscv_t *rv, uint32_t ppn) @@ -221,8 +105,8 @@ pte_t *mmu_walk(riscv_t *rv, const uint32_t vaddr, uint32_t *level) #define MMU_FAULT_CHECK(op, rv, pte, vaddr, access_bits) \ mmu_##op##_fault_check(rv, pte, vaddr, access_bits) #define MMU_FAULT_CHECK_IMPL(op, pgfault) \ - static bool mmu_##op##_fault_check(riscv_t *rv, pte_t *pte, \ - uint32_t vaddr, uint32_t access_bits) \ + bool mmu_##op##_fault_check(riscv_t *rv, pte_t *pte, uint32_t vaddr, \ + uint32_t access_bits) \ { \ uint32_t scause; \ uint32_t stval = vaddr; \ @@ -285,16 +169,6 @@ MMU_FAULT_CHECK_IMPL(ifetch, pagefault_insn) MMU_FAULT_CHECK_IMPL(read, pagefault_load) MMU_FAULT_CHECK_IMPL(write, pagefault_store) -uint32_t ppn; -uint32_t offset; -#define get_ppn_and_offset() \ - do { \ - assert(pte); \ - ppn = *pte >> (RV_PG_SHIFT - 2) << RV_PG_SHIFT; \ - offset = level == 1 ? vaddr & MASK((RV_PG_SHIFT + 10)) \ - : vaddr & MASK(RV_PG_SHIFT); \ - } while (0) - /* The IO handler that operates when the Memory Management Unit (MMU) * is enabled during system emulation is responsible for managing * input/output operations. These callbacks are designed to implement @@ -449,7 +323,7 @@ static void mmu_write_b(riscv_t *rv, const uint32_t vaddr, const uint8_t val) * TODO: dTLB can be introduced here to * cache the gVA to gPA tranlation. */ -static uint32_t mmu_translate(riscv_t *rv, uint32_t vaddr, bool rw) +uint32_t mmu_translate(riscv_t *rv, uint32_t vaddr, bool rw) { if (!rv->csr_satp) return vaddr; diff --git a/src/system.h b/src/system.h new file mode 100644 index 00000000..872c4201 --- /dev/null +++ b/src/system.h @@ -0,0 +1,175 @@ +/* + * rv32emu is freely redistributable under the MIT License. See the file + * "LICENSE" for information on usage and redistribution of this file. + */ + +#pragma once + +#if !RV32_HAS(SYSTEM) +#error "Do not manage to build this file unless you enable system support." +#endif + +#include "devices/plic.h" +#include "devices/uart.h" +#include "riscv_private.h" + +#define R 1 +#define W 0 + +#if !RV32_HAS(ELF_LOADER) + +#define MMIO_R 1 +#define MMIO_W 0 + +enum SUPPORTED_MMIO { + MMIO_PLIC, + MMIO_UART, + MMIO_VIRTIOBLK, +}; + +/* clang-format off */ +#define MMIO_OP(io, rw) \ + switch(io){ \ + case MMIO_PLIC: \ + IIF(rw)( /* read */ \ + mmio_read_val = plic_read(PRIV(rv)->plic, addr & 0x3FFFFFF); \ + plic_update_interrupts(PRIV(rv)->plic); \ + return mmio_read_val; \ + , /* write */ \ + plic_write(PRIV(rv)->plic, addr & 0x3FFFFFF, val); \ + plic_update_interrupts(PRIV(rv)->plic); \ + return; \ + ) \ + break; \ + case MMIO_UART: \ + IIF(rw)( /* read */ \ + mmio_read_val = u8250_read(PRIV(rv)->uart, addr & 0xFFFFF); \ + emu_update_uart_interrupts(rv); \ + return mmio_read_val; \ + , /* write */ \ + u8250_write(PRIV(rv)->uart, addr & 0xFFFFF, val); \ + emu_update_uart_interrupts(rv); \ + return; \ + ) \ + break; \ + case MMIO_VIRTIOBLK: \ + IIF(rw)( /* read */ \ + mmio_read_val = virtio_blk_read(PRIV(rv)->vblk, addr & 0xFFFFF); \ + emu_update_vblk_interrupts(rv); \ + return mmio_read_val; \ + , /* write */ \ + virtio_blk_write(PRIV(rv)->vblk, addr & 0xFFFFF, val); \ + emu_update_vblk_interrupts(rv); \ + return; \ + ) \ + break; \ + default: \ + rv_log_error("unknown MMIO type %d\n", io); \ + break; \ + } +/* clang-format on */ + +#define MMIO_READ() \ + do { \ + uint32_t mmio_read_val; \ + if ((addr >> 28) == 0xF) { /* MMIO at 0xF_______ */ \ + /* 256 regions of 1MiB */ \ + switch ((addr >> 20) & MASK(8)) { \ + case 0x0: \ + case 0x2: /* PLIC (0 - 0x3F) */ \ + MMIO_OP(MMIO_PLIC, MMIO_R); \ + break; \ + case 0x40: /* UART */ \ + MMIO_OP(MMIO_UART, MMIO_R); \ + break; \ + case 0x42: /* Virtio-blk */ \ + MMIO_OP(MMIO_VIRTIOBLK, MMIO_R); \ + break; \ + default: \ + __UNREACHABLE; \ + break; \ + } \ + } \ + } while (0) + +#define MMIO_WRITE() \ + do { \ + if ((addr >> 28) == 0xF) { /* MMIO at 0xF_______ */ \ + /* 256 regions of 1MiB */ \ + switch ((addr >> 20) & MASK(8)) { \ + case 0x0: \ + case 0x2: /* PLIC (0 - 0x3F) */ \ + MMIO_OP(MMIO_PLIC, MMIO_W); \ + break; \ + case 0x40: /* UART */ \ + MMIO_OP(MMIO_UART, MMIO_W); \ + break; \ + case 0x42: /* Virtio-blk */ \ + MMIO_OP(MMIO_VIRTIOBLK, MMIO_W); \ + break; \ + default: \ + __UNREACHABLE; \ + break; \ + } \ + } \ + } while (0) + +void emu_update_uart_interrupts(riscv_t *rv); +void emu_update_vblk_interrupts(riscv_t *rv); + +/* + * Linux kernel might create signal frame when returning from trap + * handling, which modifies the SEPC CSR. Thus, the fault instruction + * cannot always redo. For example, invalid memory access causes SIGSEGV. + */ +extern bool need_handle_signal; + +#define CHECK_PENDING_SIGNAL(rv, signal_flag) \ + do { \ + signal_flag = (rv->csr_sepc != rv->last_csr_sepc); \ + } while (0) + +#endif + +/* Walk through page tables and get the corresponding PTE by virtual address if + * exists + * @rv: RISC-V emulator + * @addr: virtual address + * @level: the level of which the PTE is located + * @return: NULL if a not found or fault else the corresponding PTE + */ +uint32_t *mmu_walk(riscv_t *rv, const uint32_t addr, uint32_t *level); + +/* Verify the PTE and generate corresponding faults if needed + * @op: the operation + * @rv: RISC-V emulator + * @pte: to be verified pte + * @addr: the corresponding virtual address to cause fault + * @return: false if a any fault is generated which caused by violating the + * access permission else true + */ +/* FIXME: handle access fault, addr out of range check */ +#define MMU_FAULT_CHECK_DECL(op) \ + bool mmu_##op##_fault_check(riscv_t *rv, uint32_t *pte, uint32_t vaddr, \ + uint32_t access_bits); + +MMU_FAULT_CHECK_DECL(ifetch); +MMU_FAULT_CHECK_DECL(read); +MMU_FAULT_CHECK_DECL(write); + +/* + * TODO: dTLB can be introduced here to + * cache the gVA to gPA tranlation. + */ +uint32_t mmu_translate(riscv_t *rv, uint32_t vaddr, bool rw); + +uint32_t *mmu_walk(riscv_t *rv, const uint32_t addr, uint32_t *level); + +#define get_ppn_and_offset() \ + uint32_t ppn; \ + uint32_t offset; \ + do { \ + ppn = *pte >> (RV_PG_SHIFT - 2) << RV_PG_SHIFT; \ + offset = level == 1 ? vaddr & MASK((RV_PG_SHIFT + 10)) \ + : vaddr & MASK(RV_PG_SHIFT); \ + } while (0) From bc1a451e6927a96ef4abfdc715f86dc489920e4e Mon Sep 17 00:00:00 2001 From: Meng-Hung Chen Date: Wed, 12 Feb 2025 00:51:05 +0800 Subject: [PATCH 3/5] Allow JIT compilation for system simulation This commit introduces "satp" field to the block structure in JIT mode to ensure the block cache is replaced correctly. The MOP fusion and T2C are disabled temporarily. Use the following commands to boot the Linux Kernel: $ make ENABLE_SYSTEM=1 ENABLE_MOP_FUSION=0 ENABLE_JIT=1 ENABLE_T2C=0 $ ./build/rv32emu -k -i -b --- src/decode.h | 3 + src/emulate.c | 45 +++- src/jit.c | 279 ++++++++++++++++++----- src/jit.h | 6 + src/riscv_private.h | 25 ++- src/rv32_jit.c | 526 +++++++++++++++++++++++++++++++++++++------- src/rv32_template.c | 193 +++++++++------- src/utils.c | 18 +- src/utils.h | 36 ++- 9 files changed, 908 insertions(+), 223 deletions(-) diff --git a/src/decode.h b/src/decode.h index a92c7708..b566d86c 100644 --- a/src/decode.h +++ b/src/decode.h @@ -332,6 +332,9 @@ typedef struct { struct rv_insn *target[HISTORY_SIZE]; #else uint32_t times[HISTORY_SIZE]; +#if RV32_HAS(SYSTEM) + uint32_t satp[HISTORY_SIZE]; +#endif #endif } branch_history_table_t; diff --git a/src/emulate.c b/src/emulate.c index 70ec2916..a8c79e9a 100644 --- a/src/emulate.c +++ b/src/emulate.c @@ -863,12 +863,12 @@ static block_t *block_find_or_translate(riscv_t *rv) block_t *next_blk = block_find(map, rv->PC); #else /* lookup the next block in the block cache */ - /* - * The function "cache_get()" gets the cached block by the given "key (PC)". - * In system simulation, the returned block might be dropped because it is - * not the one from the current process (by checking SATP CSR register). - */ block_t *next_blk = (block_t *) cache_get(rv->block_cache, rv->PC, true); +#if RV32_HAS(SYSTEM) + /* discard cache if satp is not matched */ + if (next_blk && next_blk->satp != rv->csr_satp) + next_blk = NULL; +#endif #endif if (next_blk) @@ -886,6 +886,14 @@ static block_t *block_find_or_translate(riscv_t *rv) block_translate(rv, next_blk); +#if RV32_HAS(JIT) && RV32_HAS(SYSTEM) + /* + * May be an ifetch fault which changes satp, Do not do this + * in "block_alloc()" + */ + next_blk->satp = rv->csr_satp; +#endif + optimize_constant(rv, next_blk); #if RV32_HAS(MOP_FUSION) /* macro operation fusion */ @@ -912,8 +920,6 @@ static block_t *block_find_or_translate(riscv_t *rv) return next_blk; } - list_del_init(&replaced_blk->list); - if (prev == replaced_blk) prev = NULL; @@ -932,6 +938,16 @@ static block_t *block_find_or_translate(riscv_t *rv) if (untaken == replaced_blk_entry) { entry->ir_tail->branch_untaken = NULL; } + + /* upadte JALR LUT */ + if (!entry->ir_tail->branch_table) { + continue; + } + + /** + * TODO: upadate all JALR instructions which references to this + * basic block as the destination. + */ } /* free IRs in replaced block */ @@ -945,6 +961,7 @@ static block_t *block_find_or_translate(riscv_t *rv) mpool_free(rv->block_ir_mp, ir); } + list_del_init(&replaced_blk->list); mpool_free(rv->block_mp, replaced_blk); #if RV32_HAS(T2C) pthread_mutex_unlock(&rv->cache_lock); @@ -961,6 +978,10 @@ static block_t *block_find_or_translate(riscv_t *rv) #if RV32_HAS(JIT) && !RV32_HAS(ARCH_TEST) static bool runtime_profiler(riscv_t *rv, block_t *block) { +#if RV32_HAS(SYSTEM) + if (block->satp != rv->csr_satp) + return false; +#endif /* Based on our observations, a significant number of true hotspots are * characterized by high usage frequency and including loop. Consequently, * we posit that our profiler could effectively identify hotspots using @@ -1053,6 +1074,10 @@ void rv_step(void *arg) /* by now, a block should be available */ assert(block); +#if RV32_HAS(JIT) && RV32_HAS(SYSTEM) + assert(block->satp == rv->csr_satp); +#endif + /* After emulating the previous block, it is determined whether the * branch is taken or not. The IR array of the current block is then * assigned to either the branch_taken or branch_untaken pointer of @@ -1060,7 +1085,11 @@ void rv_step(void *arg) */ #if RV32_HAS(BLOCK_CHAINING) - if (prev) { + if (prev +#if RV32_HAS(JIT) && RV32_HAS(SYSTEM) + && prev->satp == rv->csr_satp +#endif + ) { rv_insn_t *last_ir = prev->ir_tail; /* chain block */ if (!insn_is_unconditional_branch(last_ir->opcode)) { diff --git a/src/jit.c b/src/jit.c index e2cfff55..61b7821a 100644 --- a/src/jit.c +++ b/src/jit.c @@ -46,6 +46,10 @@ #include "riscv_private.h" #include "utils.h" +#if RV32_HAS(SYSTEM) +#include "system.h" +#endif + #define JIT_CLS_MASK 0x07 #define JIT_ALU_OP_MASK 0xf0 #define JIT_CLS_ALU 0x04 @@ -64,7 +68,11 @@ #define MAX_BLOCKS 8192 #define IN_JUMP_THRESHOLD 256 #if defined(__x86_64__) -#define JUMP_LOC jump_loc + 2 +/* indicate where the immediate value is in the emitted jump instruction */ +#define JUMP_LOC_0 jump_loc_0 + 2 +#if RV32_HAS(SYSTEM) +#define JUMP_LOC_1 jump_loc_1 + 1 +#endif /* Special values for target_pc in struct jump */ #define TARGET_PC_EXIT -1U #define TARGET_PC_RETPOLINE -3U @@ -89,7 +97,11 @@ enum x64_reg { }; #elif defined(__aarch64__) -#define JUMP_LOC jump_loc +/* indicate where the immediate value is in the emitted jump instruction */ +#define JUMP_LOC_0 jump_loc_0 +#if RV32_HAS(SYSTEM) +#define JUMP_LOC_1 jump_loc_1 +#endif /* Special values for target_pc in struct jump */ #define TARGET_PC_EXIT ~UINT32_C(0) #define TARGET_PC_ENTER (~UINT32_C(0) & 0x0101) @@ -186,6 +198,7 @@ enum condition { COND_LO, COND_GE = 10, COND_LT = 11, + COND_AL = 14, }; enum { @@ -267,12 +280,16 @@ static inline void set_dirty(int reg_idx, bool is_dirty) } } -static inline void offset_map_insert(struct jit_state *state, int32_t target_pc) +static inline void offset_map_insert(struct jit_state *state, block_t *block) { - struct offset_map *map_entry = &state->offset_map[state->n_blocks++]; assert(state->n_blocks < MAX_BLOCKS); - map_entry->pc = target_pc; + + struct offset_map *map_entry = &state->offset_map[state->n_blocks++]; + map_entry->pc = block->pc_start; map_entry->offset = state->offset; +#if RV32_HAS(SYSTEM) + map_entry->satp = block->satp; +#endif } #if !defined(__APPLE__) @@ -287,6 +304,10 @@ static void emit_bytes(struct jit_state *state, void *data, uint32_t len) should_flush = true; return; } + if (unlikely(state->n_blocks == MAX_BLOCKS)) { + should_flush = true; + return; + } #if defined(__APPLE__) && defined(__aarch64__) pthread_jit_write_protect_np(false); #endif @@ -377,12 +398,17 @@ static inline void emit_pop(struct jit_state *state, int r) } static inline void emit_jump_target_address(struct jit_state *state, - int32_t target_pc) + int32_t target_pc, + uint32_t target_satp UNUSED) { - struct jump *jump = &state->jumps[state->n_jumps++]; assert(state->n_jumps < MAX_JUMPS); + + struct jump *jump = &state->jumps[state->n_jumps++]; jump->offset_loc = state->offset; jump->target_pc = target_pc; +#if RV32_HAS(SYSTEM) + jump->target_satp = target_satp; +#endif emit4(state, 0); } #elif defined(__aarch64__) @@ -589,12 +615,13 @@ static void update_branch_imm(struct jit_state *state, #endif static inline void emit_jump_target_offset(struct jit_state *state, - uint32_t jump_loc, + uint32_t jump_loc_0, uint32_t jump_state_offset) { - struct jump *jump = &state->jumps[state->n_jumps++]; assert(state->n_jumps < MAX_JUMPS); - jump->offset_loc = jump_loc; + + struct jump *jump = &state->jumps[state->n_jumps++]; + jump->offset_loc = jump_loc_0; jump->target_offset = jump_state_offset; } @@ -794,7 +821,9 @@ static inline void emit_cmp32(struct jit_state *state, int src, int dst) static inline void emit_jcc_offset(struct jit_state *state, int code) { #if defined(__x86_64__) - emit1(state, 0x0f); + /* unconditional jump instruction does not have 0x0f prefix */ + if (code != 0xe9) + emit1(state, 0x0f); emit1(state, code); emit4(state, 0); #elif defined(__aarch64__) @@ -817,9 +846,12 @@ static inline void emit_jcc_offset(struct jit_state *state, int code) case 0x83: /* BGEU */ code = COND_HS; break; + case 0xe9: /* AL */ + code = COND_AL; + break; default: + assert(NULL); __UNREACHABLE; - break; } emit_a64(state, BR_Bcond | (0 << 5) | code); #endif @@ -863,8 +895,10 @@ static inline void emit_load(struct jit_state *state, } else if (size == S32) { /* mov */ emit1(state, 0x8b); - } else + } else { + assert(NULL); __UNREACHABLE; + } emit_modrm_and_displacement(state, dst, src, offset); #elif defined(__aarch64__) @@ -879,8 +913,8 @@ static inline void emit_load(struct jit_state *state, emit_loadstore_imm(state, LS_LDRW, dst, src, offset); break; default: + assert(NULL); __UNREACHABLE; - break; } #endif @@ -1078,17 +1112,23 @@ static inline void emit_store(struct jit_state *state, set_dirty(src, false); } -static inline void emit_jmp(struct jit_state *state, uint32_t target_pc) +static inline void emit_jmp(struct jit_state *state, + uint32_t target_pc, + uint32_t target_satp UNUSED) { #if defined(__x86_64__) emit1(state, 0xe9); - emit_jump_target_address(state, target_pc); + emit_jump_target_address(state, target_pc, target_satp); #elif defined(__aarch64__) - struct jump *jump = &state->jumps[state->n_jumps++]; assert(state->n_jumps < MAX_JUMPS); + + struct jump *jump = &state->jumps[state->n_jumps++]; jump->offset_loc = state->offset; jump->target_pc = target_pc; emit_a64(state, UBR_B); +#if RV32_HAS(SYSTEM) + jump->target_satp = target_satp; +#endif #endif } @@ -1127,7 +1167,7 @@ static inline void emit_exit(struct jit_state *state) emit_jump_target_offset(state, state->offset, state->exit_loc); emit4(state, 0); #elif defined(__aarch64__) - emit_jmp(state, TARGET_PC_EXIT); + emit_jmp(state, TARGET_PC_EXIT, 0); #endif } @@ -1173,7 +1213,7 @@ static void divmod(struct jit_state *state, if (sign) { /* handle overflow */ - uint32_t jump_loc = state->offset; + uint32_t jump_loc_0 = state->offset; emit_jcc_offset(state, 0x85); emit_cmp_imm32(state, rm, -1); if (mod) @@ -1181,7 +1221,7 @@ static void divmod(struct jit_state *state, else emit_load_imm(state, R10, 0x80000000); emit_conditional_move(state, rd, R10, rd, COND_EQ); - emit_jump_target_offset(state, JUMP_LOC, state->offset); + emit_jump_target_offset(state, JUMP_LOC_0, state->offset); } if (!mod) { /* handle dividing zero */ @@ -1282,11 +1322,11 @@ static void muldivmod(struct jit_state *state, emit_pop(state, RCX); /* handle DIV overflow */ emit1(state, 0x9d); /* popfq */ - uint32_t jump_loc = state->offset; + uint32_t jump_loc_0 = state->offset; emit_jcc_offset(state, 0x85); emit_cmp_imm32(state, RCX, 0x80000000); emit_conditional_move(state, RCX, RAX); - emit_jump_target_offset(state, JUMP_LOC, state->offset); + emit_jump_target_offset(state, JUMP_LOC_0, state->offset); } } else { /* Restore dividend to RCX */ @@ -1297,12 +1337,12 @@ static void muldivmod(struct jit_state *state, if (sign) { /* handle REM overflow */ emit1(state, 0x9d); /* popfq */ - uint32_t jump_loc = state->offset; + uint32_t jump_loc_0 = state->offset; emit_jcc_offset(state, 0x85); emit_cmp_imm32(state, RCX, 0x80000000); emit_load_imm(state, RCX, 0); emit_conditional_move(state, RCX, RDX); - emit_jump_target_offset(state, JUMP_LOC, state->offset); + emit_jump_target_offset(state, JUMP_LOC_0, state->offset); } } } @@ -1343,6 +1383,113 @@ static void muldivmod(struct jit_state *state, } #endif /* RV32_HAS(EXT_M) */ +#if RV32_HAS(SYSTEM) +uint32_t jit_mmio_read_wrapper(riscv_t *rv, uint32_t addr) +{ + MMIO_READ(); + __UNREACHABLE; +} + +void jit_mmu_handler(riscv_t *rv, uint32_t vreg_idx) +{ + assert(vreg_idx < 32); + + uint32_t addr; + + if (rv->jit_mmu.type == rv_insn_lb || rv->jit_mmu.type == rv_insn_lh || + rv->jit_mmu.type == rv_insn_lbu || rv->jit_mmu.type == rv_insn_lhu || + rv->jit_mmu.type == rv_insn_lw) + addr = rv->io.mem_translate(rv, rv->jit_mmu.vaddr, R); + else + addr = rv->io.mem_translate(rv, rv->jit_mmu.vaddr, W); + + if (addr == rv->jit_mmu.vaddr || addr < PRIV(rv)->mem->mem_size) { + rv->jit_mmu.is_mmio = 0; + rv->jit_mmu.paddr = addr; + return; + } + + uint32_t val; + rv->jit_mmu.is_mmio = 1; + + switch (rv->jit_mmu.type) { + case rv_insn_sb: + val = rv->X[vreg_idx] & 0xff; + MMIO_WRITE(); + break; + case rv_insn_sh: + val = rv->X[vreg_idx] & 0xffff; + MMIO_WRITE(); + break; + case rv_insn_sw: + val = rv->X[vreg_idx]; + MMIO_WRITE(); + break; + case rv_insn_lb: + rv->X[vreg_idx] = (int8_t) jit_mmio_read_wrapper(rv, addr); + break; + case rv_insn_lh: + rv->X[vreg_idx] = (int16_t) jit_mmio_read_wrapper(rv, addr); + break; + case rv_insn_lw: + rv->X[vreg_idx] = jit_mmio_read_wrapper(rv, addr); + break; + case rv_insn_lbu: + rv->X[vreg_idx] = (uint8_t) jit_mmio_read_wrapper(rv, addr); + break; + case rv_insn_lhu: + rv->X[vreg_idx] = (uint16_t) jit_mmio_read_wrapper(rv, addr); + break; + default: + assert(NULL); + __UNREACHABLE; + } +} + +void emit_jit_mmu_handler(struct jit_state *state, uint8_t vreg_idx) +{ + assert(vreg_idx < 32); + +#if defined(__x86_64__) + /* push $rdi */ + emit1(state, 0xff); + emit_modrm(state, 0x3 << 6, 0x6, parameter_reg[0]); + + /* mov $vreg_idx, %rsi */ + emit1(state, 0xbe); + emit4(state, vreg_idx); + + /* call jit_mmu_handler */ + emit_load_imm_sext(state, temp_reg, (uintptr_t) &jit_mmu_handler); + emit1(state, 0xff); + emit_modrm(state, 0x3 << 6, 0x2, temp_reg); + + /* pop rv to $rdi */ + emit1(state, 0x8f); + emit_modrm(state, 0x3 << 6, 0x0, parameter_reg[0]); +#elif defined(__aarch64__) + uint32_t insn; + + /* push rv into stack */ + insn = (0xf81f0fe << 4) | R0; + emit_a64(state, insn); + + /* move vreg_idx into R1 */ + emit_movewide_imm(state, false, R1, vreg_idx); + + /* load &jit_mmu_handler */ + emit_movewide_imm(state, true, temp_reg, (uintptr_t) &jit_mmu_handler); + /* blr jit_mmu_handler */ + insn = (0xd63f << 16) | (temp_reg << 5); + emit_a64(state, insn); + + /* pop from stack */ + insn = (0xf84107e << 4) | R0; + emit_a64(state, insn); +#endif +} +#endif + static void prepare_translate(struct jit_state *state) { #if defined(__x86_64__) @@ -1846,7 +1993,9 @@ static void ra_load2_sext(struct jit_state *state, } #endif -void parse_branch_history_table(struct jit_state *state, rv_insn_t *ir) +void parse_branch_history_table(struct jit_state *state, + riscv_t *rv UNUSED, + rv_insn_t *ir) { int max_idx = 0; branch_history_table_t *bt = ir->branch_table; @@ -1857,14 +2006,21 @@ void parse_branch_history_table(struct jit_state *state, rv_insn_t *ir) max_idx = i; } if (bt->PC[max_idx] && bt->times[max_idx] >= IN_JUMP_THRESHOLD) { - save_reg(state, 0); - unmap_vm_reg(0); - emit_load_imm(state, register_map[0].reg_idx, bt->PC[max_idx]); - emit_cmp32(state, temp_reg, register_map[0].reg_idx); - uint32_t jump_loc = state->offset; - emit_jcc_offset(state, 0x85); - emit_jmp(state, bt->PC[max_idx]); - emit_jump_target_offset(state, JUMP_LOC, state->offset); + IIF(RV32_HAS(SYSTEM))(if (bt->satp[max_idx] == rv->csr_satp), ) + { + save_reg(state, 0); + unmap_vm_reg(0); + emit_load_imm(state, register_map[0].reg_idx, bt->PC[max_idx]); + emit_cmp32(state, temp_reg, register_map[0].reg_idx); + uint32_t jump_loc_0 = state->offset; + emit_jcc_offset(state, 0x85); +#if RV32_HAS(SYSTEM) + emit_jmp(state, bt->PC[max_idx], bt->satp[max_idx]); +#else + emit_jmp(state, bt->PC[max_idx], 0); +#endif + emit_jump_target_offset(state, JUMP_LOC_0, state->offset); + } } } @@ -2028,8 +2184,12 @@ static void resolve_jumps(struct jit_state *state) target_loc = jump.offset_loc + sizeof(uint32_t); for (int i = 0; i < state->n_blocks; i++) { if (jump.target_pc == state->offset_map[i].pc) { - target_loc = state->offset_map[i].offset; - break; + IIF(RV32_HAS(SYSTEM)) + (if (jump.target_satp == state->offset_map[i].satp), ) + { + target_loc = state->offset_map[i].offset; + break; + } } } } @@ -2050,11 +2210,14 @@ static void translate_chained_block(struct jit_state *state, riscv_t *rv, block_t *block) { - if (set_has(&state->set, block->pc_start)) + if (set_has(&state->set, RV_HASH_KEY(block))) return; - set_add(&state->set, block->pc_start); - offset_map_insert(state, block->pc_start); + if (state->n_blocks == MAX_BLOCKS) + return; + + assert(set_add(&state->set, RV_HASH_KEY(block))); + offset_map_insert(state, block); translate(state, rv, block); if (unlikely(should_flush)) return; @@ -2062,15 +2225,22 @@ static void translate_chained_block(struct jit_state *state, if (ir->branch_untaken && !set_has(&state->set, ir->branch_untaken->pc)) { block_t *block1 = cache_get(rv->block_cache, ir->branch_untaken->pc, false); - if (block1->translatable) - translate_chained_block(state, rv, block1); + if (block1->translatable) { + IIF(RV32_HAS(SYSTEM)) + (if (block1->satp == rv->csr_satp), ) + translate_chained_block(state, rv, block1); + } } if (ir->branch_taken && !set_has(&state->set, ir->branch_taken->pc)) { block_t *block1 = cache_get(rv->block_cache, ir->branch_taken->pc, false); - if (block1->translatable) - translate_chained_block(state, rv, block1); + if (block1->translatable) { + IIF(RV32_HAS(SYSTEM)) + (if (block1->satp == rv->csr_satp), ) + translate_chained_block(state, rv, block1); + } } + branch_history_table_t *bt = ir->branch_table; if (bt) { int max_idx = 0; @@ -2082,10 +2252,16 @@ static void translate_chained_block(struct jit_state *state, } if (bt->PC[max_idx] && bt->times[max_idx] >= IN_JUMP_THRESHOLD && !set_has(&state->set, bt->PC[max_idx])) { - block_t *block1 = - cache_get(rv->block_cache, bt->PC[max_idx], false); - if (block1 && block1->translatable) - translate_chained_block(state, rv, block1); + IIF(RV32_HAS(SYSTEM))(if (bt->satp[max_idx] == rv->csr_satp), ) + { + block_t *block1 = + cache_get(rv->block_cache, bt->PC[max_idx], false); + if (block1 && block1->translatable) { + IIF(RV32_HAS(SYSTEM)) + (if (block1->satp == rv->csr_satp), ) + translate_chained_block(state, rv, block1); + } + } } } } @@ -2093,18 +2269,23 @@ static void translate_chained_block(struct jit_state *state, void jit_translate(riscv_t *rv, block_t *block) { struct jit_state *state = rv->jit_state; - if (set_has(&state->set, block->pc_start)) { + if (set_has(&state->set, RV_HASH_KEY(block))) { for (int i = 0; i < state->n_blocks; i++) { - if (block->pc_start == state->offset_map[i].pc) { + if (block->pc_start == state->offset_map[i].pc +#if RV32_HAS(SYSTEM) + && block->satp == state->offset_map[i].satp +#endif + ) { block->offset = state->offset_map[i].offset; block->hot = true; return; } } + assert(NULL); __UNREACHABLE; } restart: - memset(state->jumps, 0, 1024 * sizeof(struct jump)); + memset(state->jumps, 0, MAX_JUMPS * sizeof(struct jump)); state->n_jumps = 0; block->offset = state->offset; translate_chained_block(state, rv, block); diff --git a/src/jit.h b/src/jit.h index 3967a1df..4bbafa2f 100644 --- a/src/jit.h +++ b/src/jit.h @@ -14,11 +14,17 @@ struct jump { uint32_t offset_loc; uint32_t target_pc; uint32_t target_offset; +#if RV32_HAS(SYSTEM) + uint32_t target_satp; +#endif }; struct offset_map { uint32_t pc; uint32_t offset; +#if RV32_HAS(SYSTEM) + uint32_t satp; +#endif }; struct jit_state { diff --git a/src/riscv_private.h b/src/riscv_private.h index c9f36bf4..163aac1c 100644 --- a/src/riscv_private.h +++ b/src/riscv_private.h @@ -90,6 +90,9 @@ typedef struct block { bool translatable; /**< Determine the block has RV32AF insturctions or not */ bool has_loops; /**< Determine the block has loop or not */ +#if RV32_HAS(SYSTEM) + uint32_t satp; +#endif #if RV32_HAS(T2C) bool compiled; /**< The T2C request is enqueued or not */ #endif @@ -119,16 +122,32 @@ void block_map_clear(riscv_t *rv); struct riscv_internal { bool halt; /* indicate whether the core is halted */ - /* I/O interface */ - riscv_io_t io; - /* integer registers */ + /* + * Aarch64 encoder only accepts 9 bits signed offset. Do not put this + * structure below the section. + */ riscv_word_t X[N_RV_REGS]; riscv_word_t PC; +#if RV32_HAS(JIT) && RV32_HAS(SYSTEM) + /* + * Aarch64 encoder only accepts 9 bits signed offset. Do not put this + * structure below the section. + */ + struct { + uint32_t is_mmio; /* whether is MMIO or not */ + uint32_t type; /* 0: read, 1: write */ + uint32_t vaddr; + uint32_t paddr; + } jit_mmu; +#endif /* user provided data */ riscv_user_t data; + /* I/O interface */ + riscv_io_t io; + #if RV32_HAS(EXT_F) /* float registers */ riscv_float_t F[32]; diff --git a/src/rv32_jit.c b/src/rv32_jit.c index 19f43bc2..41a28678 100644 --- a/src/rv32_jit.c +++ b/src/rv32_jit.c @@ -13,7 +13,7 @@ GEN(jal, { emit_load_imm(state, vm_reg[0], ir->pc + 4); } store_back(state); - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); @@ -28,7 +28,7 @@ GEN(jalr, { emit_load_imm(state, vm_reg[1], ir->pc + 4); } store_back(state); - parse_branch_history_table(state, ir); + parse_branch_history_table(state, rv, ir); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); }) @@ -36,17 +36,17 @@ GEN(beq, { ra_load2(state, ir->rs1, ir->rs2); emit_cmp32(state, vm_reg[1], vm_reg[0]); store_back(state); - uint32_t jump_loc = state->offset; + uint32_t jump_loc_0 = state->offset; emit_jcc_offset(state, 0x84); if (ir->branch_untaken) { - emit_jmp(state, ir->pc + 4); + emit_jmp(state, ir->pc + 4, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + 4); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); - emit_jump_target_offset(state, JUMP_LOC, state->offset); + emit_jump_target_offset(state, JUMP_LOC_0, state->offset); if (ir->branch_taken) { - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); @@ -56,17 +56,17 @@ GEN(bne, { ra_load2(state, ir->rs1, ir->rs2); emit_cmp32(state, vm_reg[1], vm_reg[0]); store_back(state); - uint32_t jump_loc = state->offset; + uint32_t jump_loc_0 = state->offset; emit_jcc_offset(state, 0x85); if (ir->branch_untaken) { - emit_jmp(state, ir->pc + 4); + emit_jmp(state, ir->pc + 4, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + 4); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); - emit_jump_target_offset(state, JUMP_LOC, state->offset); + emit_jump_target_offset(state, JUMP_LOC_0, state->offset); if (ir->branch_taken) { - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); @@ -76,17 +76,17 @@ GEN(blt, { ra_load2(state, ir->rs1, ir->rs2); emit_cmp32(state, vm_reg[1], vm_reg[0]); store_back(state); - uint32_t jump_loc = state->offset; + uint32_t jump_loc_0 = state->offset; emit_jcc_offset(state, 0x8c); if (ir->branch_untaken) { - emit_jmp(state, ir->pc + 4); + emit_jmp(state, ir->pc + 4, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + 4); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); - emit_jump_target_offset(state, JUMP_LOC, state->offset); + emit_jump_target_offset(state, JUMP_LOC_0, state->offset); if (ir->branch_taken) { - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); @@ -96,17 +96,17 @@ GEN(bge, { ra_load2(state, ir->rs1, ir->rs2); emit_cmp32(state, vm_reg[1], vm_reg[0]); store_back(state); - uint32_t jump_loc = state->offset; + uint32_t jump_loc_0 = state->offset; emit_jcc_offset(state, 0x8d); if (ir->branch_untaken) { - emit_jmp(state, ir->pc + 4); + emit_jmp(state, ir->pc + 4, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + 4); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); - emit_jump_target_offset(state, JUMP_LOC, state->offset); + emit_jump_target_offset(state, JUMP_LOC_0, state->offset); if (ir->branch_taken) { - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); @@ -116,17 +116,17 @@ GEN(bltu, { ra_load2(state, ir->rs1, ir->rs2); emit_cmp32(state, vm_reg[1], vm_reg[0]); store_back(state); - uint32_t jump_loc = state->offset; + uint32_t jump_loc_0 = state->offset; emit_jcc_offset(state, 0x82); if (ir->branch_untaken) { - emit_jmp(state, ir->pc + 4); + emit_jmp(state, ir->pc + 4, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + 4); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); - emit_jump_target_offset(state, JUMP_LOC, state->offset); + emit_jump_target_offset(state, JUMP_LOC_0, state->offset); if (ir->branch_taken) { - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); @@ -136,17 +136,17 @@ GEN(bgeu, { ra_load2(state, ir->rs1, ir->rs2); emit_cmp32(state, vm_reg[1], vm_reg[0]); store_back(state); - uint32_t jump_loc = state->offset; + uint32_t jump_loc_0 = state->offset; emit_jcc_offset(state, 0x83); if (ir->branch_untaken) { - emit_jmp(state, ir->pc + 4); + emit_jmp(state, ir->pc + 4, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + 4); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); - emit_jump_target_offset(state, JUMP_LOC, state->offset); + emit_jump_target_offset(state, JUMP_LOC_0, state->offset); if (ir->branch_taken) { - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); @@ -155,66 +155,412 @@ GEN(bgeu, { GEN(lb, { memory_t *m = PRIV(rv)->mem; vm_reg[0] = ra_load(state, ir->rs1); - emit_load_imm_sext(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); - emit_alu64(state, 0x01, vm_reg[0], temp_reg); - vm_reg[1] = map_vm_reg(state, ir->rd); - emit_load_sext(state, S8, temp_reg, vm_reg[1], 0); + IIF(RV32_HAS(SYSTEM)) + ( + { + emit_load_imm_sext(state, temp_reg, ir->imm); + emit_alu32(state, 0x01, vm_reg[0], temp_reg); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.vaddr)); + emit_load_imm(state, temp_reg, rv_insn_lb); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.type)); + + store_back(state); + emit_jit_mmu_handler(state, ir->rd); + /* clear register mapping */ + reset_reg(); + + /* + * If it's MMIO, assign the read value to host register, otherwise, + * load from memory. + */ + emit_load(state, S32, parameter_reg[0], temp_reg, + offsetof(riscv_t, jit_mmu.is_mmio)); + emit_cmp_imm32(state, temp_reg, 0); + uint32_t jump_loc_0 = state->offset; + emit_jcc_offset(state, 0x84); + vm_reg[1] = map_vm_reg(state, ir->rd); + + emit_load(state, S32, parameter_reg[0], vm_reg[1], + offsetof(riscv_t, X) + 4 * ir->rd); + /* skip regular loading */ + uint64_t jump_loc_1 = state->offset; + emit_jcc_offset(state, 0xe9); + + emit_jump_target_offset(state, JUMP_LOC_0, state->offset); + emit_load(state, S32, parameter_reg[0], vm_reg[0], + offsetof(riscv_t, jit_mmu.paddr)); + emit_load_imm_sext(state, temp_reg, (intptr_t) m->mem_base); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + emit_load_sext(state, S8, temp_reg, vm_reg[1], 0); + emit_jump_target_offset(state, JUMP_LOC_1, state->offset); + }, + { + emit_load_imm_sext(state, temp_reg, + (intptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load_sext(state, S8, temp_reg, vm_reg[1], 0); + }) }) GEN(lh, { memory_t *m = PRIV(rv)->mem; vm_reg[0] = ra_load(state, ir->rs1); - emit_load_imm_sext(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); - emit_alu64(state, 0x01, vm_reg[0], temp_reg); - vm_reg[1] = map_vm_reg(state, ir->rd); - emit_load_sext(state, S16, temp_reg, vm_reg[1], 0); + IIF(RV32_HAS(SYSTEM)) + ( + { + emit_load_imm_sext(state, temp_reg, ir->imm); + emit_alu32(state, 0x01, vm_reg[0], temp_reg); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.vaddr)); + emit_load_imm(state, temp_reg, rv_insn_lh); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.type)); + + store_back(state); + emit_jit_mmu_handler(state, ir->rd); + /* clear register mapping */ + reset_reg(); + + /* + * If it's MMIO, assign the read value to host register, otherwise, + * load from memory. + */ + emit_load(state, S32, parameter_reg[0], temp_reg, + offsetof(riscv_t, jit_mmu.is_mmio)); + emit_cmp_imm32(state, temp_reg, 0); + uint32_t jump_loc_0 = state->offset; + emit_jcc_offset(state, 0x84); + vm_reg[1] = map_vm_reg(state, ir->rd); + + emit_load(state, S32, parameter_reg[0], vm_reg[1], + offsetof(riscv_t, X) + 4 * ir->rd); + /* skip regular loading */ + uint64_t jump_loc_1 = state->offset; + emit_jcc_offset(state, 0xe9); + + emit_jump_target_offset(state, JUMP_LOC_0, state->offset); + emit_load(state, S32, parameter_reg[0], vm_reg[0], + offsetof(riscv_t, jit_mmu.paddr)); + emit_load_imm_sext(state, temp_reg, (intptr_t) m->mem_base); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + emit_load_sext(state, S16, temp_reg, vm_reg[1], 0); + emit_jump_target_offset(state, JUMP_LOC_1, state->offset); + }, + { + emit_load_imm_sext(state, temp_reg, + (intptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load_sext(state, S16, temp_reg, vm_reg[1], 0); + }) }) GEN(lw, { memory_t *m = PRIV(rv)->mem; vm_reg[0] = ra_load(state, ir->rs1); - emit_load_imm_sext(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); - emit_alu64(state, 0x01, vm_reg[0], temp_reg); - vm_reg[1] = map_vm_reg(state, ir->rd); - emit_load(state, S32, temp_reg, vm_reg[1], 0); + IIF(RV32_HAS(SYSTEM)) + ( + { + emit_load_imm_sext(state, temp_reg, ir->imm); + emit_alu32(state, 0x01, vm_reg[0], temp_reg); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.vaddr)); + emit_load_imm(state, temp_reg, rv_insn_lw); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.type)); + + store_back(state); + emit_jit_mmu_handler(state, ir->rd); + /* clear register mapping */ + reset_reg(); + + /* + * If it's MMIO, assign the read value to host register, otherwise, + * load from memory. + */ + emit_load(state, S32, parameter_reg[0], temp_reg, + offsetof(riscv_t, jit_mmu.is_mmio)); + emit_cmp_imm32(state, temp_reg, 0); + uint32_t jump_loc_0 = state->offset; + emit_jcc_offset(state, 0x84); + vm_reg[1] = map_vm_reg(state, ir->rd); + + emit_load(state, S32, parameter_reg[0], vm_reg[1], + offsetof(riscv_t, X) + 4 * ir->rd); + /* skip regular loading */ + uint64_t jump_loc_1 = state->offset; + emit_jcc_offset(state, 0xe9); + + emit_jump_target_offset(state, JUMP_LOC_0, state->offset); + emit_load(state, S32, parameter_reg[0], vm_reg[0], + offsetof(riscv_t, jit_mmu.paddr)); + emit_load_imm_sext(state, temp_reg, (intptr_t) m->mem_base); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + emit_load(state, S32, temp_reg, vm_reg[1], 0); + emit_jump_target_offset(state, JUMP_LOC_1, state->offset); + }, + { + emit_load_imm_sext(state, temp_reg, + (intptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load(state, S32, temp_reg, vm_reg[1], 0); + }) }) GEN(lbu, { memory_t *m = PRIV(rv)->mem; vm_reg[0] = ra_load(state, ir->rs1); - emit_load_imm_sext(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); - emit_alu64(state, 0x01, vm_reg[0], temp_reg); - vm_reg[1] = map_vm_reg(state, ir->rd); - emit_load(state, S8, temp_reg, vm_reg[1], 0); + IIF(RV32_HAS(SYSTEM)) + ( + { + emit_load_imm_sext(state, temp_reg, ir->imm); + emit_alu32(state, 0x01, vm_reg[0], temp_reg); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.vaddr)); + emit_load_imm(state, temp_reg, rv_insn_lbu); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.type)); + + store_back(state); + emit_jit_mmu_handler(state, ir->rd); + /* clear register mapping */ + reset_reg(); + + /* + * If it's MMIO, assign the read value to host register, otherwise, + * load from memory. + */ + emit_load(state, S32, parameter_reg[0], temp_reg, + offsetof(riscv_t, jit_mmu.is_mmio)); + emit_cmp_imm32(state, temp_reg, 0); + uint32_t jump_loc_0 = state->offset; + emit_jcc_offset(state, 0x84); + vm_reg[1] = map_vm_reg(state, ir->rd); + + emit_load(state, S32, parameter_reg[0], vm_reg[1], + offsetof(riscv_t, X) + 4 * ir->rd); + /* skip regular loading */ + uint64_t jump_loc_1 = state->offset; + emit_jcc_offset(state, 0xe9); + + emit_jump_target_offset(state, JUMP_LOC_0, state->offset); + emit_load(state, S32, parameter_reg[0], vm_reg[0], + offsetof(riscv_t, jit_mmu.paddr)); + emit_load_imm_sext(state, temp_reg, (intptr_t) m->mem_base); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + emit_load(state, S8, temp_reg, vm_reg[1], 0); + emit_jump_target_offset(state, JUMP_LOC_1, state->offset); + }, + { + emit_load_imm_sext(state, temp_reg, + (intptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load(state, S8, temp_reg, vm_reg[1], 0); + }) }) GEN(lhu, { memory_t *m = PRIV(rv)->mem; vm_reg[0] = ra_load(state, ir->rs1); - emit_load_imm_sext(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); - emit_alu64(state, 0x01, vm_reg[0], temp_reg); - vm_reg[1] = map_vm_reg(state, ir->rd); - emit_load(state, S16, temp_reg, vm_reg[1], 0); + IIF(RV32_HAS(SYSTEM)) + ( + { + emit_load_imm_sext(state, temp_reg, ir->imm); + emit_alu32(state, 0x01, vm_reg[0], temp_reg); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.vaddr)); + emit_load_imm(state, temp_reg, rv_insn_lhu); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.type)); + + store_back(state); + emit_jit_mmu_handler(state, ir->rd); + /* clear register mapping */ + reset_reg(); + + /* + * If it's MMIO, assign the read value to host register, otherwise, + * load from memory. + */ + emit_load(state, S32, parameter_reg[0], temp_reg, + offsetof(riscv_t, jit_mmu.is_mmio)); + emit_cmp_imm32(state, temp_reg, 0); + uint32_t jump_loc_0 = state->offset; + emit_jcc_offset(state, 0x84); + vm_reg[1] = map_vm_reg(state, ir->rd); + + emit_load(state, S32, parameter_reg[0], vm_reg[1], + offsetof(riscv_t, X) + 4 * ir->rd); + /* skip regular loading */ + uint64_t jump_loc_1 = state->offset; + emit_jcc_offset(state, 0xe9); + + emit_jump_target_offset(state, JUMP_LOC_0, state->offset); + emit_load(state, S32, parameter_reg[0], vm_reg[0], + offsetof(riscv_t, jit_mmu.paddr)); + emit_load_imm_sext(state, temp_reg, (intptr_t) m->mem_base); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + emit_load(state, S16, temp_reg, vm_reg[1], 0); + emit_jump_target_offset(state, JUMP_LOC_1, state->offset); + }, + { + emit_load_imm_sext(state, temp_reg, + (intptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load(state, S16, temp_reg, vm_reg[1], 0); + }) }) GEN(sb, { memory_t *m = PRIV(rv)->mem; vm_reg[0] = ra_load(state, ir->rs1); - emit_load_imm_sext(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); - emit_alu64(state, 0x01, vm_reg[0], temp_reg); - vm_reg[1] = ra_load(state, ir->rs2); - emit_store(state, S8, vm_reg[1], temp_reg, 0); + IIF(RV32_HAS(SYSTEM)) + ( + { + emit_load_imm_sext(state, temp_reg, ir->imm); + emit_alu32(state, 0x01, vm_reg[0], temp_reg); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.vaddr)); + emit_load_imm(state, temp_reg, rv_insn_sb); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.type)); + store_back(state); + emit_jit_mmu_handler(state, ir->rs2); + /* clear register mapping */ + reset_reg(); + + /* + * If it's MMIO, it does not need to do the storing since it has + * been done in the mmio handler, otherwise, store the value into + * memory. + */ + emit_load(state, S32, parameter_reg[0], temp_reg, + offsetof(riscv_t, jit_mmu.is_mmio)); + emit_cmp_imm32(state, temp_reg, 1); + uint32_t jump_loc_0 = state->offset; + emit_jcc_offset(state, 0x84); + + emit_load(state, S32, parameter_reg[0], vm_reg[0], + offsetof(riscv_t, jit_mmu.paddr)); + emit_load_imm_sext(state, temp_reg, (intptr_t) m->mem_base); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = ra_load(state, ir->rs2); + emit_store(state, S8, vm_reg[1], temp_reg, 0); + emit_jump_target_offset(state, JUMP_LOC_0, state->offset); + /* + * Clear register mapping since we do not ensure operand "ir->rs2" + * is loaded or not. + */ + reset_reg(); + }, + { + emit_load_imm_sext(state, temp_reg, + (intptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = ra_load(state, ir->rs2); + emit_store(state, S8, vm_reg[1], temp_reg, 0); + }) }) GEN(sh, { memory_t *m = PRIV(rv)->mem; vm_reg[0] = ra_load(state, ir->rs1); - emit_load_imm_sext(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); - emit_alu64(state, 0x01, vm_reg[0], temp_reg); - vm_reg[1] = ra_load(state, ir->rs2); - emit_store(state, S16, vm_reg[1], temp_reg, 0); + IIF(RV32_HAS(SYSTEM)) + ( + { + emit_load_imm_sext(state, temp_reg, ir->imm); + emit_alu32(state, 0x01, vm_reg[0], temp_reg); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.vaddr)); + emit_load_imm(state, temp_reg, rv_insn_sh); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.type)); + store_back(state); + emit_jit_mmu_handler(state, ir->rs2); + /* clear register mapping */ + reset_reg(); + + /* + * If it's MMIO, it does not need to do the storing since it has + * been done in the mmio handler, otherwise, store the value into + * memory. + */ + emit_load(state, S32, parameter_reg[0], temp_reg, + offsetof(riscv_t, jit_mmu.is_mmio)); + emit_cmp_imm32(state, temp_reg, 1); + uint32_t jump_loc_0 = state->offset; + emit_jcc_offset(state, 0x84); + + emit_load(state, S32, parameter_reg[0], vm_reg[0], + offsetof(riscv_t, jit_mmu.paddr)); + emit_load_imm_sext(state, temp_reg, (intptr_t) m->mem_base); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = ra_load(state, ir->rs2); + emit_store(state, S16, vm_reg[1], temp_reg, 0); + emit_jump_target_offset(state, JUMP_LOC_0, state->offset); + /* + * Clear register mapping since we do not ensure operand "ir->rs2" + * is loaded or not. + */ + reset_reg(); + }, + { + emit_load_imm_sext(state, temp_reg, + (intptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = ra_load(state, ir->rs2); + emit_store(state, S16, vm_reg[1], temp_reg, 0); + }) }) GEN(sw, { memory_t *m = PRIV(rv)->mem; vm_reg[0] = ra_load(state, ir->rs1); - emit_load_imm_sext(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); - emit_alu64(state, 0x01, vm_reg[0], temp_reg); - vm_reg[1] = ra_load(state, ir->rs2); - emit_store(state, S32, vm_reg[1], temp_reg, 0); + IIF(RV32_HAS(SYSTEM)) + ( + { + emit_load_imm_sext(state, temp_reg, ir->imm); + emit_alu32(state, 0x01, vm_reg[0], temp_reg); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.vaddr)); + emit_load_imm(state, temp_reg, rv_insn_sw); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.type)); + store_back(state); + emit_jit_mmu_handler(state, ir->rs2); + /* clear register mapping */ + reset_reg(); + + /* + * If it's MMIO, it does not need to do the storing since it has + * been done in the mmio handler, otherwise, store the value into + * memory. + */ + emit_load(state, S32, parameter_reg[0], temp_reg, + offsetof(riscv_t, jit_mmu.is_mmio)); + emit_cmp_imm32(state, temp_reg, 1); + uint32_t jump_loc_0 = state->offset; + emit_jcc_offset(state, 0x84); + + emit_load(state, S32, parameter_reg[0], vm_reg[0], + offsetof(riscv_t, jit_mmu.paddr)); + emit_load_imm_sext(state, temp_reg, (intptr_t) m->mem_base); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = ra_load(state, ir->rs2); + emit_store(state, S32, vm_reg[1], temp_reg, 0); + emit_jump_target_offset(state, JUMP_LOC_0, state->offset); + /* + * Clear register mapping since we do not ensure operand "ir->rs2" + * is loaded into host register "vm_reg[1]" or not. + */ + reset_reg(); + }, + { + emit_load_imm_sext(state, temp_reg, + (intptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = ra_load(state, ir->rs2); + emit_store(state, S32, vm_reg[1], temp_reg, 0); + }) }) GEN(addi, { vm_reg[0] = ra_load(state, ir->rs1); @@ -229,20 +575,20 @@ GEN(slti, { emit_cmp_imm32(state, vm_reg[0], ir->imm); vm_reg[1] = map_vm_reg(state, ir->rd); emit_load_imm(state, vm_reg[1], 1); - uint32_t jump_loc = state->offset; + uint32_t jump_loc_0 = state->offset; emit_jcc_offset(state, 0x8c); emit_load_imm(state, vm_reg[1], 0); - emit_jump_target_offset(state, JUMP_LOC, state->offset); + emit_jump_target_offset(state, JUMP_LOC_0, state->offset); }) GEN(sltiu, { vm_reg[0] = ra_load(state, ir->rs1); emit_cmp_imm32(state, vm_reg[0], ir->imm); vm_reg[1] = map_vm_reg(state, ir->rd); emit_load_imm(state, vm_reg[1], 1); - uint32_t jump_loc = state->offset; + uint32_t jump_loc_0 = state->offset; emit_jcc_offset(state, 0x82); emit_load_imm(state, vm_reg[1], 0); - emit_jump_target_offset(state, JUMP_LOC, state->offset); + emit_jump_target_offset(state, JUMP_LOC_0, state->offset); }) GEN(xori, { vm_reg[0] = ra_load(state, ir->rs1); @@ -319,20 +665,20 @@ GEN(slt, { vm_reg[2] = map_vm_reg(state, ir->rd); emit_cmp32(state, vm_reg[1], vm_reg[0]); emit_load_imm(state, vm_reg[2], 1); - uint32_t jump_loc = state->offset; + uint32_t jump_loc_0 = state->offset; emit_jcc_offset(state, 0x8c); emit_load_imm(state, vm_reg[2], 0); - emit_jump_target_offset(state, JUMP_LOC, state->offset); + emit_jump_target_offset(state, JUMP_LOC_0, state->offset); }) GEN(sltu, { ra_load2(state, ir->rs1, ir->rs2); vm_reg[2] = map_vm_reg(state, ir->rd); emit_cmp32(state, vm_reg[1], vm_reg[0]); emit_load_imm(state, vm_reg[2], 1); - uint32_t jump_loc = state->offset; + uint32_t jump_loc_0 = state->offset; emit_jcc_offset(state, 0x82); emit_load_imm(state, vm_reg[2], 0); - emit_jump_target_offset(state, JUMP_LOC, state->offset); + emit_jump_target_offset(state, JUMP_LOC_0, state->offset); }) GEN(xor, { ra_load2(state, ir->rs1, ir->rs2); @@ -388,17 +734,24 @@ GEN(ebreak, { }) GEN(wfi, { assert(NULL); }) GEN(uret, { assert(NULL); }) +#if RV32_HAS(SYSTEM) GEN(sret, { assert(NULL); }) +#endif GEN(hret, { assert(NULL); }) GEN(mret, { assert(NULL); }) GEN(sfencevma, { assert(NULL); }) +#if RV32_HAS(Zifencei) /* RV32 Zifencei Standard Extension */ GEN(fencei, { assert(NULL); }) +#endif +#if RV32_HAS(Zicsr) /* RV32 Zicsr Standard Extension */ GEN(csrrw, { assert(NULL); }) GEN(csrrs, { assert(NULL); }) GEN(csrrc, { assert(NULL); }) GEN(csrrwi, { assert(NULL); }) GEN(csrrsi, { assert(NULL); }) GEN(csrrci, { assert(NULL); }) +#endif +#if RV32_HAS(EXT_M) GEN(mul, { ra_load2(state, ir->rs1, ir->rs2); vm_reg[2] = map_vm_reg(state, ir->rd); @@ -458,6 +811,8 @@ GEN(remu, { emit_mov(state, vm_reg[0], vm_reg[2]); muldivmod(state, 0x98, temp_reg, vm_reg[2], 0); }) +#endif +#if RV32_HAS(EXT_A) GEN(lrw, { assert(NULL); }) GEN(scw, { assert(NULL); }) GEN(amoswapw, { assert(NULL); }) @@ -469,6 +824,8 @@ GEN(amominw, { assert(NULL); }) GEN(amomaxw, { assert(NULL); }) GEN(amominuw, { assert(NULL); }) GEN(amomaxuw, { assert(NULL); }) +#endif +#if RV32_HAS(EXT_F) GEN(flw, { assert(NULL); }) GEN(fsw, { assert(NULL); }) GEN(fmadds, { assert(NULL); }) @@ -495,6 +852,8 @@ GEN(fclasss, { assert(NULL); }) GEN(fcvtsw, { assert(NULL); }) GEN(fcvtswu, { assert(NULL); }) GEN(fmvwx, { assert(NULL); }) +#endif +#if RV32_HAS(EXT_C) GEN(caddi4spn, { vm_reg[0] = ra_load(state, rv_reg_sp); vm_reg[1] = map_vm_reg(state, ir->rd); @@ -528,7 +887,7 @@ GEN(cjal, { vm_reg[0] = map_vm_reg(state, rv_reg_ra); emit_load_imm(state, vm_reg[0], ir->pc + 2); store_back(state); - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); @@ -587,7 +946,7 @@ GEN(cand, { }) GEN(cj, { store_back(state); - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); @@ -596,17 +955,17 @@ GEN(cbeqz, { vm_reg[0] = ra_load(state, ir->rs1); emit_cmp_imm32(state, vm_reg[0], 0); store_back(state); - uint32_t jump_loc = state->offset; + uint32_t jump_loc_0 = state->offset; emit_jcc_offset(state, 0x84); if (ir->branch_untaken) { - emit_jmp(state, ir->pc + 2); + emit_jmp(state, ir->pc + 2, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + 2); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); - emit_jump_target_offset(state, JUMP_LOC, state->offset); + emit_jump_target_offset(state, JUMP_LOC_0, state->offset); if (ir->branch_taken) { - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); @@ -616,17 +975,17 @@ GEN(cbnez, { vm_reg[0] = ra_load(state, ir->rs1); emit_cmp_imm32(state, vm_reg[0], 0); store_back(state); - uint32_t jump_loc = state->offset; + uint32_t jump_loc_0 = state->offset; emit_jcc_offset(state, 0x85); if (ir->branch_untaken) { - emit_jmp(state, ir->pc + 2); + emit_jmp(state, ir->pc + 2, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + 2); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); - emit_jump_target_offset(state, JUMP_LOC, state->offset); + emit_jump_target_offset(state, JUMP_LOC_0, state->offset); if (ir->branch_taken) { - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); @@ -648,7 +1007,7 @@ GEN(cjr, { vm_reg[0] = ra_load(state, ir->rs1); emit_mov(state, vm_reg[0], temp_reg); store_back(state); - parse_branch_history_table(state, ir); + parse_branch_history_table(state, rv, ir); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); }) @@ -674,7 +1033,7 @@ GEN(cjalr, { vm_reg[1] = map_vm_reg(state, rv_reg_ra); emit_load_imm(state, vm_reg[1], ir->pc + 2); store_back(state); - parse_branch_history_table(state, ir); + parse_branch_history_table(state, rv, ir); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); }) @@ -693,13 +1052,19 @@ GEN(cswsp, { vm_reg[1] = ra_load(state, ir->rs2); emit_store(state, S32, vm_reg[1], temp_reg, 0); }) +#endif +#if RV32_HAS(EXT_C) && RV32_HAS(EXT_F) GEN(cflwsp, { assert(NULL); }) GEN(cfswsp, { assert(NULL); }) GEN(cflw, { assert(NULL); }) GEN(cfsw, { assert(NULL); }) +#endif +#if RV32_HAS(Zba) GEN(sh1add, { assert(NULL); }) GEN(sh2add, { assert(NULL); }) GEN(sh3add, { assert(NULL); }) +#endif +#if RV32_HAS(Zbb) GEN(andn, { assert(NULL); }) GEN(orn, { assert(NULL); }) GEN(xnor, { assert(NULL); }) @@ -718,9 +1083,13 @@ GEN(ror, { assert(NULL); }) GEN(rori, { assert(NULL); }) GEN(orcb, { assert(NULL); }) GEN(rev8, { assert(NULL); }) +#endif +#if RV32_HAS(Zbc) GEN(clmul, { assert(NULL); }) GEN(clmulh, { assert(NULL); }) GEN(clmulr, { assert(NULL); }) +#endif +#if RV32_HAS(Zbs) GEN(bclr, { assert(NULL); }) GEN(bclri, { assert(NULL); }) GEN(bext, { assert(NULL); }) @@ -729,3 +1098,4 @@ GEN(binv, { assert(NULL); }) GEN(binvi, { assert(NULL); }) GEN(bset, { assert(NULL); }) GEN(bseti, { assert(NULL); }) +#endif diff --git a/src/rv32_template.c b/src/rv32_template.c index b53e6dbd..a2354daf 100644 --- a/src/rv32_template.c +++ b/src/rv32_template.c @@ -168,11 +168,18 @@ RVOP( struct rv_insn *taken = ir->branch_taken; if (taken) { #if RV32_HAS(JIT) - cache_get(rv->block_cache, PC, true); - if (!set_add(&pc_set, PC)) - has_loops = true; - if (cache_hot(rv->block_cache, PC)) - goto end_op; + IIF(RV32_HAS(SYSTEM)(if (!rv->is_trapped && !reloc_enable_mmu), )) + { + IIF(RV32_HAS(SYSTEM)) + (block_t *next =, ) cache_get(rv->block_cache, PC, true); + IIF(RV32_HAS(SYSTEM))(if (next->satp == rv->csr_satp), ) + { + if (!set_add(&pc_set, PC)) + has_loops = true; + if (cache_hot(rv->block_cache, PC)) + goto end_op; + } + } #endif #if RV32_HAS(SYSTEM) if (!rv->is_trapped) @@ -249,36 +256,42 @@ RVOP( } \ } #else -#define LOOKUP_OR_UPDATE_BRANCH_HISTORY_TABLE() \ - IIF(RV32_HAS(GDBSTUB)(if (!rv->debug_mode), )) \ - { \ - block_t *block = cache_get(rv->block_cache, PC, true); \ - if (block) { \ - for (int i = 0; i < HISTORY_SIZE; i++) { \ - if (ir->branch_table->PC[i] == PC) { \ - ir->branch_table->times[i]++; \ - if (cache_hot(rv->block_cache, PC)) \ - goto end_op; \ - } \ - } \ - /* update branch history table */ \ - int min_idx = 0; \ - for (int i = 0; i < HISTORY_SIZE; i++) { \ - if (!ir->branch_table->times[i]) { \ - min_idx = i; \ - break; \ - } else if (ir->branch_table->times[min_idx] > \ - ir->branch_table->times[i]) { \ - min_idx = i; \ - } \ - } \ - ir->branch_table->times[min_idx] = ir->branch_table->PC[min_idx] = \ - PC; \ - if (cache_hot(rv->block_cache, PC)) \ - goto end_op; \ - MUST_TAIL return block->ir_head->impl(rv, block->ir_head, cycle, \ - PC); \ - } \ +#define LOOKUP_OR_UPDATE_BRANCH_HISTORY_TABLE() \ + IIF(RV32_HAS(SYSTEM))(if (!rv->is_trapped && !reloc_enable_mmu), ) \ + { \ + block_t *block = cache_get(rv->block_cache, PC, true); \ + if (block) { \ + for (int i = 0; i < HISTORY_SIZE; i++) { \ + if (ir->branch_table->PC[i] == PC) { \ + IIF(RV32_HAS(SYSTEM)) \ + (if (ir->branch_table->satp[i] == rv->csr_satp), ) \ + { \ + ir->branch_table->times[i]++; \ + if (cache_hot(rv->block_cache, PC)) \ + goto end_op; \ + } \ + } \ + } \ + /* update branch history table */ \ + int min_idx = 0; \ + for (int i = 0; i < HISTORY_SIZE; i++) { \ + if (!ir->branch_table->times[i]) { \ + min_idx = i; \ + break; \ + } else if (ir->branch_table->times[min_idx] > \ + ir->branch_table->times[i]) { \ + min_idx = i; \ + } \ + } \ + ir->branch_table->times[min_idx] = 1; \ + ir->branch_table->PC[min_idx] = PC; \ + IIF(RV32_HAS(SYSTEM)) \ + (ir->branch_table->satp[min_idx] = rv->csr_satp, ); \ + if (cache_hot(rv->block_cache, PC)) \ + goto end_op; \ + MUST_TAIL return block->ir_head->impl(rv, block->ir_head, cycle, \ + PC); \ + } \ } #endif @@ -368,11 +381,14 @@ RVOP( IIF(RV32_HAS(JIT)) \ ( \ { \ - cache_get(rv->block_cache, PC + 4, true); \ - if (!set_add(&pc_set, PC + 4)) \ - has_loops = true; \ - if (cache_hot(rv->block_cache, PC + 4)) \ - goto nextop; \ + block_t *next = cache_get(rv->block_cache, PC + 4, true); \ + if (next IIF(RV32_HAS(SYSTEM))( \ + &&next->satp == rv->csr_satp, )) { \ + if (!set_add(&pc_set, PC + 4)) \ + has_loops = true; \ + if (cache_hot(rv->block_cache, PC + 4)) \ + goto nextop; \ + } \ }, ); \ PC += 4; \ IIF(RV32_HAS(SYSTEM)) \ @@ -402,11 +418,14 @@ RVOP( IIF(RV32_HAS(JIT)) \ ( \ { \ - cache_get(rv->block_cache, PC, true); \ - if (!set_add(&pc_set, PC)) \ - has_loops = true; \ - if (cache_hot(rv->block_cache, PC)) \ - goto end_op; \ + block_t *next = cache_get(rv->block_cache, PC, true); \ + if (next IIF(RV32_HAS(SYSTEM))( \ + &&next->satp == rv->csr_satp, )) { \ + if (!set_add(&pc_set, PC)) \ + has_loops = true; \ + if (cache_hot(rv->block_cache, PC)) \ + goto end_op; \ + } \ }, ); \ IIF(RV32_HAS(SYSTEM)) \ ( \ @@ -2088,11 +2107,15 @@ RVOP( struct rv_insn *taken = ir->branch_taken; if (taken) { #if RV32_HAS(JIT) - cache_get(rv->block_cache, PC, true); - if (!set_add(&pc_set, PC)) - has_loops = true; - if (cache_hot(rv->block_cache, PC)) - goto end_op; + IIF(RV32_HAS(SYSTEM)) + (block_t *next =, ) cache_get(rv->block_cache, PC, true); + IIF(RV32_HAS(SYSTEM))(if (next->satp == rv->csr_satp), ) + { + if (!set_add(&pc_set, PC)) + has_loops = true; + if (cache_hot(rv->block_cache, PC)) + goto end_op; + } #endif #if RV32_HAS(SYSTEM) @@ -2255,11 +2278,15 @@ RVOP( struct rv_insn *taken = ir->branch_taken; if (taken) { #if RV32_HAS(JIT) - cache_get(rv->block_cache, PC, true); - if (!set_add(&pc_set, PC)) - has_loops = true; - if (cache_hot(rv->block_cache, PC)) - goto end_op; + IIF(RV32_HAS(SYSTEM)) + (block_t *next =, ) cache_get(rv->block_cache, PC, true); + IIF(RV32_HAS(SYSTEM))(if (next->satp == rv->csr_satp), ) + { + if (!set_add(&pc_set, PC)) + has_loops = true; + if (cache_hot(rv->block_cache, PC)) + goto end_op; + } #endif #if RV32_HAS(SYSTEM) if (!rv->is_trapped) @@ -2293,11 +2320,15 @@ RVOP( if (!untaken) goto nextop; #if RV32_HAS(JIT) - cache_get(rv->block_cache, PC + 2, true); - if (!set_add(&pc_set, PC + 2)) - has_loops = true; - if (cache_hot(rv->block_cache, PC + 2)) - goto nextop; + IIF(RV32_HAS(SYSTEM)) + (block_t *next =, ) cache_get(rv->block_cache, PC + 2, true); + IIF(RV32_HAS(SYSTEM))(if (next->satp == rv->csr_satp), ) + { + if (!set_add(&pc_set, PC + 2)) + has_loops = true; + if (cache_hot(rv->block_cache, PC + 2)) + goto nextop; + } #endif PC += 2; #if RV32_HAS(SYSTEM) @@ -2315,11 +2346,15 @@ RVOP( struct rv_insn *taken = ir->branch_taken; if (taken) { #if RV32_HAS(JIT) - cache_get(rv->block_cache, PC, true); - if (!set_add(&pc_set, PC)) - has_loops = true; - if (cache_hot(rv->block_cache, PC)) - goto end_op; + IIF(RV32_HAS(SYSTEM)) + (block_t *next =, ) cache_get(rv->block_cache, PC, true); + IIF(RV32_HAS(SYSTEM))(if (next->satp == rv->csr_satp), ) + { + if (!set_add(&pc_set, PC)) + has_loops = true; + if (cache_hot(rv->block_cache, PC)) + goto end_op; + } #endif #if RV32_HAS(SYSTEM) if (!rv->is_trapped) @@ -2362,11 +2397,15 @@ RVOP( if (!untaken) goto nextop; #if RV32_HAS(JIT) - cache_get(rv->block_cache, PC + 2, true); - if (!set_add(&pc_set, PC + 2)) - has_loops = true; - if (cache_hot(rv->block_cache, PC + 2)) - goto nextop; + IIF(RV32_HAS(SYSTEM)) + (block_t *next =, ) cache_get(rv->block_cache, PC + 2, true); + IIF(RV32_HAS(SYSTEM))(if (next->satp == rv->csr_satp), ) + { + if (!set_add(&pc_set, PC + 2)) + has_loops = true; + if (cache_hot(rv->block_cache, PC + 2)) + goto nextop; + } #endif PC += 2; #if RV32_HAS(SYSTEM) @@ -2384,11 +2423,15 @@ RVOP( struct rv_insn *taken = ir->branch_taken; if (taken) { #if RV32_HAS(JIT) - cache_get(rv->block_cache, PC, true); - if (!set_add(&pc_set, PC)) - has_loops = true; - if (cache_hot(rv->block_cache, PC)) - goto end_op; + IIF(RV32_HAS(SYSTEM)) + (block_t *next =, ) cache_get(rv->block_cache, PC, true); + IIF(RV32_HAS(SYSTEM))(if (next->satp == rv->csr_satp), ) + { + if (!set_add(&pc_set, PC)) + has_loops = true; + if (cache_hot(rv->block_cache, PC)) + goto end_op; + } #endif #if RV32_HAS(SYSTEM) if (!rv->is_trapped) diff --git a/src/utils.c b/src/utils.c index 3199235d..e5a42645 100644 --- a/src/utils.c +++ b/src/utils.c @@ -3,6 +3,7 @@ * "LICENSE" for information on usage and redistribution of this file. */ +#include #include #include #include @@ -186,15 +187,18 @@ void set_reset(set_t *set) * @set: a pointer points to target set * @key: the key of the inserted entry */ -bool set_add(set_t *set, uint32_t key) +bool set_add(set_t *set, rv_hash_key_t key) { - const uint32_t index = set_hash(key); + const rv_hash_key_t index = set_hash(key); + uint8_t count = 0; - while (set->table[index][count]) { - if (set->table[index][count++] == key) + for (; set->table[index][count]; count++) { + assert(count < SET_SLOTS_SIZE); + if (set->table[index][count] == key) return false; } + assert(count < SET_SLOTS_SIZE); set->table[index][count] = key; return true; } @@ -204,10 +208,12 @@ bool set_add(set_t *set, uint32_t key) * @set: a pointer points to target set * @key: the key of the inserted entry */ -bool set_has(set_t *set, uint32_t key) +bool set_has(set_t *set, rv_hash_key_t key) { - const uint32_t index = set_hash(key); + const rv_hash_key_t index = set_hash(key); + for (uint8_t count = 0; set->table[index][count]; count++) { + assert(count < SET_SLOTS_SIZE); if (set->table[index][count] == key) return true; } diff --git a/src/utils.h b/src/utils.h index 78f68985..227fde87 100644 --- a/src/utils.h +++ b/src/utils.h @@ -13,16 +13,32 @@ void rv_gettimeofday(struct timeval *tv); /* Retrieve the value used by a clock which is specified by clock_id. */ void rv_clock_gettime(struct timespec *tp); +#if RV32_HAS(JIT) && RV32_HAS(SYSTEM) + +typedef uint64_t rv_hash_key_t; + +#define HASH_FUNC_IMPL(name, size_bits, size) \ + FORCE_INLINE rv_hash_key_t name(rv_hash_key_t val) \ + { \ + /* 0x61c8864680b583eb is 64-bit golden ratio */ \ + return (val * 0x61c8864680b583ebull >> (64 - size_bits)) & \ + ((size) - (1)); \ + } +#else + +typedef uint32_t rv_hash_key_t; + /* This hashing routine is adapted from Linux kernel. * See * https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/include/linux/hash.h */ #define HASH_FUNC_IMPL(name, size_bits, size) \ - FORCE_INLINE uint32_t name(uint32_t val) \ + FORCE_INLINE rv_hash_key_t name(rv_hash_key_t val) \ { \ /* 0x61C88647 is 32-bit golden ratio */ \ return (val * 0x61C88647 >> (32 - size_bits)) & ((size) - (1)); \ } +#endif /* sanitize_path returns the shortest path name equivalent to path * by purely lexical processing. It applies the following rules @@ -133,11 +149,23 @@ static inline void list_del_init(struct list_head *node) #define SET_SIZE (1 << SET_SIZE_BITS) #define SET_SLOTS_SIZE 32 +#if RV32_HAS(JIT) && RV32_HAS(SYSTEM) +/* + * Use composed key in JIT. The higher 32 bits stores the value of supervisor + * address translation and protection (SATP) register, and the lower 32 bits + * stores the program counter (PC) as same as userspace simulation. + */ +#define RV_HASH_KEY(block) \ + ((((rv_hash_key_t) block->satp) << 32) | (rv_hash_key_t) block->pc_start) +#else +#define RV_HASH_KEY(block) ((rv_hash_key_t) block->pc_start) +#endif + /* The set consists of SET_SIZE buckets, with each bucket containing * SET_SLOTS_SIZE slots. */ typedef struct { - uint32_t table[SET_SIZE][SET_SLOTS_SIZE]; + rv_hash_key_t table[SET_SIZE][SET_SLOTS_SIZE]; } set_t; /** @@ -151,11 +179,11 @@ void set_reset(set_t *set); * @set: a pointer points to target set * @key: the key of the inserted entry */ -bool set_add(set_t *set, uint32_t key); +bool set_add(set_t *set, rv_hash_key_t key); /** * set_has - check whether the element exist in the set or not * @set: a pointer points to target set * @key: the key of the inserted entry */ -bool set_has(set_t *set, uint32_t key); +bool set_has(set_t *set, rv_hash_key_t key); From 8d461158145ccdac1f08a29f2b971bc4dc51eee2 Mon Sep 17 00:00:00 2001 From: Meng-Hung Chen Date: Fri, 14 Feb 2025 17:27:47 +0800 Subject: [PATCH 4/5] CI: Add JIT system simulation test --- .github/workflows/main.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 02e1c71b..7c36220b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -243,6 +243,14 @@ jobs: .ci/boot-linux.sh make ENABLE_SYSTEM=1 clean if: ${{ always() }} + - name: boot Linux kernel test (JIT) + env: + CC: ${{ steps.install_cc.outputs.cc }} + run: | + make distclean && make INITRD_SIZE=32 ENABLE_SYSTEM=1 ENABLE_JIT=1 ENABLE_T2C=0 ENABLE_MOP_FUSION=0 -j$(nproc) && make ENABLE_SYSTEM=1 artifact -j$(nproc) + .ci/boot-linux.sh + make ENABLE_SYSTEM=1 ENABLE_JIT=1 ENABLE_T2C=0 ENABLE_MOP_FUSION=0 clean + if: ${{ always() }} - name: Architecture test env: CC: ${{ steps.install_cc.outputs.cc }} From 4ef61b8966268a765455d5e30c5e0bdb18eca93e Mon Sep 17 00:00:00 2001 From: Meng-Hung Chen Date: Sat, 8 Feb 2025 14:42:39 +0800 Subject: [PATCH 5/5] Fix missing LLVM libraries --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a0fb6f3a..41c8324b 100644 --- a/Makefile +++ b/Makefile @@ -252,7 +252,7 @@ ifeq ($(call has, JIT), 1) ifeq ("$(CHECK_LLVM_LIBS)", "0") OBJS_EXT += t2c.o CFLAGS += -g $(shell $(LLVM_CONFIG) --cflags) - LDFLAGS += $(shell $(LLVM_CONFIG) --libs) + LDFLAGS += $(shell $(LLVM_CONFIG) --libfiles) else $(error No llvm-config-18 installed. Check llvm-config-18 installation in advance, or use "ENABLE_T2C=0" to disable tier-2 LLVM compiler) endif