diff --git a/internal/engine/wazevo/backend/isa/arm64/abi.go b/internal/engine/wazevo/backend/isa/arm64/abi.go index 6615471c6a..4eaa13ce1c 100644 --- a/internal/engine/wazevo/backend/isa/arm64/abi.go +++ b/internal/engine/wazevo/backend/isa/arm64/abi.go @@ -101,13 +101,14 @@ func (m *machine) LowerParams(args []ssa.Value) { bits := arg.Type.Bits() // At this point of compilation, we don't yet know how much space exist below the return address. // So we instruct the address mode to add the `argStackOffset` to the offset at the later phase of compilation. - amode := addressMode{imm: arg.Offset, rn: spVReg, kind: addressModeKindArgStackSpace} + amode := m.amodePool.Allocate() + *amode = addressMode{imm: arg.Offset, rn: spVReg, kind: addressModeKindArgStackSpace} load := m.allocateInstr() switch arg.Type { case ssa.TypeI32, ssa.TypeI64: - load.asULoad(operandNR(reg), amode, bits) + load.asULoad(reg, amode, bits) case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128: - load.asFpuLoad(operandNR(reg), amode, bits) + load.asFpuLoad(reg, amode, bits) default: panic("BUG") } @@ -169,7 +170,8 @@ func (m *machine) LowerReturns(rets []ssa.Value) { // At this point of compilation, we don't yet know how much space exist below the return address. // So we instruct the address mode to add the `retStackOffset` to the offset at the later phase of compilation. - amode := addressMode{imm: r.Offset, rn: spVReg, kind: addressModeKindResultStackSpace} + amode := m.amodePool.Allocate() + *amode = addressMode{imm: r.Offset, rn: spVReg, kind: addressModeKindResultStackSpace} store := m.allocateInstr() store.asStore(operandNR(reg), amode, bits) m.insert(store) @@ -215,9 +217,9 @@ func (m *machine) callerGenFunctionReturnVReg(a *backend.FunctionABI, retIndex i ldr := m.allocateInstr() switch r.Type { case ssa.TypeI32, ssa.TypeI64: - ldr.asULoad(operandNR(reg), amode, r.Type.Bits()) + ldr.asULoad(reg, amode, r.Type.Bits()) case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128: - ldr.asFpuLoad(operandNR(reg), amode, r.Type.Bits()) + ldr.asFpuLoad(reg, amode, r.Type.Bits()) default: panic("BUG") } @@ -225,7 +227,7 @@ func (m *machine) callerGenFunctionReturnVReg(a *backend.FunctionABI, retIndex i } } -func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) (*instruction, addressMode) { +func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) (*instruction, *addressMode) { exct := m.executableContext exct.PendingInstructions = exct.PendingInstructions[:0] mode := m.resolveAddressModeForOffset(offset, dstBits, rn, allowTmpRegUse) @@ -235,15 +237,15 @@ func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset return cur, mode } -func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) addressMode { +func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) *addressMode { if rn.RegType() != regalloc.RegTypeInt { panic("BUG: rn should be a pointer: " + formatVRegSized(rn, 64)) } - var amode addressMode + amode := m.amodePool.Allocate() if offsetFitsInAddressModeKindRegUnsignedImm12(dstBits, offset) { - amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: rn, imm: offset} + *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: rn, imm: offset} } else if offsetFitsInAddressModeKindRegSignedImm9(offset) { - amode = addressMode{kind: addressModeKindRegSignedImm9, rn: rn, imm: offset} + *amode = addressMode{kind: addressModeKindRegSignedImm9, rn: rn, imm: offset} } else { var indexReg regalloc.VReg if allowTmpRegUse { @@ -253,7 +255,7 @@ func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn reg indexReg = m.compiler.AllocateVReg(ssa.TypeI64) m.lowerConstantI64(indexReg, offset) } - amode = addressMode{kind: addressModeKindRegReg, rn: rn, rm: indexReg, extOp: extendOpUXTX /* indicates index rm is 64-bit */} + *amode = addressMode{kind: addressModeKindRegReg, rn: rn, rm: indexReg, extOp: extendOpUXTX /* indicates index rm is 64-bit */} } return amode } @@ -315,7 +317,7 @@ func (m *machine) insertAddOrSubStackPointer(rd regalloc.VReg, diff int64, add b } else { ao = aluOpSub } - alu.asALU(ao, operandNR(rd), operandNR(spVReg), imm12Operand, true) + alu.asALU(ao, rd, operandNR(spVReg), imm12Operand, true) m.insert(alu) } else { m.lowerConstantI64(tmpRegVReg, diff) @@ -326,7 +328,7 @@ func (m *machine) insertAddOrSubStackPointer(rd regalloc.VReg, diff int64, add b } else { ao = aluOpSub } - alu.asALU(ao, operandNR(rd), operandNR(spVReg), operandNR(tmpRegVReg), true) + alu.asALU(ao, rd, operandNR(spVReg), operandNR(tmpRegVReg), true) m.insert(alu) } } diff --git a/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go b/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go index 7a9cceb332..f8b5d97ac7 100644 --- a/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go +++ b/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go @@ -59,25 +59,26 @@ func (m *machine) goEntryPreamblePassArg(cur *instruction, paramSlicePtr regallo } else { postIndexImm = 8 } - loadMode := addressMode{kind: addressModeKindPostIndex, rn: paramSlicePtr, imm: postIndexImm} + loadMode := m.amodePool.Allocate() + *loadMode = addressMode{kind: addressModeKindPostIndex, rn: paramSlicePtr, imm: postIndexImm} instr := m.allocateInstr() switch typ { case ssa.TypeI32: - instr.asULoad(loadTargetReg, loadMode, 32) + instr.asULoad(loadTargetReg.reg(), loadMode, 32) case ssa.TypeI64: - instr.asULoad(loadTargetReg, loadMode, 64) + instr.asULoad(loadTargetReg.reg(), loadMode, 64) case ssa.TypeF32: - instr.asFpuLoad(loadTargetReg, loadMode, 32) + instr.asFpuLoad(loadTargetReg.reg(), loadMode, 32) case ssa.TypeF64: - instr.asFpuLoad(loadTargetReg, loadMode, 64) + instr.asFpuLoad(loadTargetReg.reg(), loadMode, 64) case ssa.TypeV128: - instr.asFpuLoad(loadTargetReg, loadMode, 128) + instr.asFpuLoad(loadTargetReg.reg(), loadMode, 128) } cur = linkInstr(cur, instr) if isStackArg { - var storeMode addressMode + var storeMode *addressMode cur, storeMode = m.resolveAddressModeForOffsetAndInsert(cur, argStartOffsetFromSP+arg.Offset, bits, spVReg, true) toStack := m.allocateInstr() toStack.asStore(loadTargetReg, storeMode, bits) @@ -113,21 +114,22 @@ func (m *machine) goEntryPreamblePassResult(cur *instruction, resultSlicePtr reg } if isStackArg { - var loadMode addressMode + var loadMode *addressMode cur, loadMode = m.resolveAddressModeForOffsetAndInsert(cur, resultStartOffsetFromSP+result.Offset, bits, spVReg, true) toReg := m.allocateInstr() switch typ { case ssa.TypeI32, ssa.TypeI64: - toReg.asULoad(storeTargetReg, loadMode, bits) + toReg.asULoad(storeTargetReg.reg(), loadMode, bits) case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128: - toReg.asFpuLoad(storeTargetReg, loadMode, bits) + toReg.asFpuLoad(storeTargetReg.reg(), loadMode, bits) default: panic("TODO?") } cur = linkInstr(cur, toReg) } - mode := addressMode{kind: addressModeKindPostIndex, rn: resultSlicePtr, imm: postIndexImm} + mode := m.amodePool.Allocate() + *mode = addressMode{kind: addressModeKindPostIndex, rn: resultSlicePtr, imm: postIndexImm} instr := m.allocateInstr() instr.asStore(storeTargetReg, mode, bits) cur = linkInstr(cur, instr) @@ -214,11 +216,12 @@ func (m *machine) move64(dst, src regalloc.VReg, prev *instruction) *instruction func (m *machine) loadOrStoreAtExecutionContext(d regalloc.VReg, offset wazevoapi.Offset, store bool, prev *instruction) *instruction { instr := m.allocateInstr() - mode := addressMode{kind: addressModeKindRegUnsignedImm12, rn: savedExecutionContextPtr, imm: offset.I64()} + mode := m.amodePool.Allocate() + *mode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: savedExecutionContextPtr, imm: offset.I64()} if store { instr.asStore(operandNR(d), mode, 64) } else { - instr.asULoad(operandNR(d), mode, 64) + instr.asULoad(d, mode, 64) } return linkInstr(prev, instr) } diff --git a/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go b/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go index 466b1f9609..99e6bb482d 100644 --- a/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go +++ b/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go @@ -87,7 +87,8 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig * // Module context is always the second argument. moduleCtrPtr := x1VReg store := m.allocateInstr() - amode := addressMode{kind: addressModeKindRegUnsignedImm12, rn: execCtrPtr, imm: offset} + amode := m.amodePool.Allocate() + *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: execCtrPtr, imm: offset} store.asStore(operandNR(moduleCtrPtr), amode, 64) cur = linkInstr(cur, store) } @@ -120,11 +121,9 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig * } else { sizeInBits = 64 } - store.asStore(operandNR(v), - addressMode{ - kind: addressModeKindPostIndex, - rn: arg0ret0AddrReg, imm: int64(sizeInBits / 8), - }, sizeInBits) + amode := m.amodePool.Allocate() + *amode = addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg, imm: int64(sizeInBits / 8)} + store.asStore(operandNR(v), amode, sizeInBits) cur = linkInstr(cur, store) } @@ -139,7 +138,7 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig * frameSizeReg = xzrVReg sliceSizeReg = xzrVReg } - _amode := addressModePreOrPostIndex(spVReg, -16, true) + _amode := addressModePreOrPostIndex(m, spVReg, -16, true) storeP := m.allocateInstr() storeP.asStorePair64(frameSizeReg, sliceSizeReg, _amode) cur = linkInstr(cur, storeP) @@ -165,8 +164,8 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig * cur = m.addsAddOrSubStackPointer(cur, spVReg, frameInfoSize+goCallStackSize, true) ldr := m.allocateInstr() // And load the return address. - ldr.asULoad(operandNR(lrVReg), - addressModePreOrPostIndex(spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64) + amode := addressModePreOrPostIndex(m, spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */) + ldr.asULoad(lrVReg, amode, 64) cur = linkInstr(cur, ldr) originalRet0Reg := x17VReg // Caller save, so we can use it for whatever we want. @@ -183,23 +182,24 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig * r := &abi.Rets[i] if r.Kind == backend.ABIArgKindReg { loadIntoReg := m.allocateInstr() - mode := addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg} + mode := m.amodePool.Allocate() + *mode = addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg} switch r.Type { case ssa.TypeI32: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoReg.asULoad(operandNR(r.Reg), mode, 32) + loadIntoReg.asULoad(r.Reg, mode, 32) case ssa.TypeI64: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoReg.asULoad(operandNR(r.Reg), mode, 64) + loadIntoReg.asULoad(r.Reg, mode, 64) case ssa.TypeF32: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 32) + loadIntoReg.asFpuLoad(r.Reg, mode, 32) case ssa.TypeF64: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 64) + loadIntoReg.asFpuLoad(r.Reg, mode, 64) case ssa.TypeV128: mode.imm = 16 - loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 128) + loadIntoReg.asFpuLoad(r.Reg, mode, 128) default: panic("TODO") } @@ -208,28 +208,29 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig * // First we need to load the value to a temporary just like ^^. intTmp, floatTmp := x11VReg, v11VReg loadIntoTmpReg := m.allocateInstr() - mode := addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg} + mode := m.amodePool.Allocate() + *mode = addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg} var resultReg regalloc.VReg switch r.Type { case ssa.TypeI32: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoTmpReg.asULoad(operandNR(intTmp), mode, 32) + loadIntoTmpReg.asULoad(intTmp, mode, 32) resultReg = intTmp case ssa.TypeI64: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoTmpReg.asULoad(operandNR(intTmp), mode, 64) + loadIntoTmpReg.asULoad(intTmp, mode, 64) resultReg = intTmp case ssa.TypeF32: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 32) + loadIntoTmpReg.asFpuLoad(floatTmp, mode, 32) resultReg = floatTmp case ssa.TypeF64: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 64) + loadIntoTmpReg.asFpuLoad(floatTmp, mode, 64) resultReg = floatTmp case ssa.TypeV128: mode.imm = 16 - loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 128) + loadIntoTmpReg.asFpuLoad(floatTmp, mode, 128) resultReg = floatTmp default: panic("TODO") @@ -258,12 +259,13 @@ func (m *machine) saveRegistersInExecutionContext(cur *instruction, regs []regal case regalloc.RegTypeFloat: sizeInBits = 128 } - store.asStore(operandNR(v), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - // Execution context is always the first argument. - rn: x0VReg, imm: offset, - }, sizeInBits) + mode := m.amodePool.Allocate() + *mode = addressMode{ + kind: addressModeKindRegUnsignedImm12, + // Execution context is always the first argument. + rn: x0VReg, imm: offset, + } + store.asStore(operandNR(v), mode, sizeInBits) store.prev = cur cur.next = store cur = store @@ -276,7 +278,7 @@ func (m *machine) restoreRegistersInExecutionContext(cur *instruction, regs []re offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64() for _, v := range regs { load := m.allocateInstr() - var as func(dst operand, amode addressMode, sizeInBits byte) + var as func(dst regalloc.VReg, amode *addressMode, sizeInBits byte) var sizeInBits byte switch v.RegType() { case regalloc.RegTypeInt: @@ -286,12 +288,13 @@ func (m *machine) restoreRegistersInExecutionContext(cur *instruction, regs []re as = load.asFpuLoad sizeInBits = 128 } - as(operandNR(v), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - // Execution context is always the first argument. - rn: x0VReg, imm: offset, - }, sizeInBits) + mode := m.amodePool.Allocate() + *mode = addressMode{ + kind: addressModeKindRegUnsignedImm12, + // Execution context is always the first argument. + rn: x0VReg, imm: offset, + } + as(v, mode, sizeInBits) cur = linkInstr(cur, load) offset += 16 // Imm12 must be aligned 16 for vector regs, so we unconditionally load regs at the offset of multiple of 16. } @@ -324,11 +327,9 @@ func (m *machine) setExitCode(cur *instruction, execCtr regalloc.VReg, exitCode // Set the exit status on the execution context. setExistStatus := m.allocateInstr() - setExistStatus.asStore(operandNR(constReg), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - rn: execCtr, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64(), - }, 32) + mode := m.amodePool.Allocate() + *mode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: execCtr, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64()} + setExistStatus.asStore(operandNR(constReg), mode, 32) cur = linkInstr(cur, setExistStatus) return cur } @@ -340,12 +341,13 @@ func (m *machine) storeReturnAddressAndExit(cur *instruction) *instruction { cur = linkInstr(cur, adr) storeReturnAddr := m.allocateInstr() - storeReturnAddr.asStore(operandNR(tmpRegVReg), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - // Execution context is always the first argument. - rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(), - }, 64) + mode := m.amodePool.Allocate() + *mode = addressMode{ + kind: addressModeKindRegUnsignedImm12, + // Execution context is always the first argument. + rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(), + } + storeReturnAddr.asStore(operandNR(tmpRegVReg), mode, 64) cur = linkInstr(cur, storeReturnAddr) // Exit the execution. @@ -364,11 +366,12 @@ func (m *machine) saveCurrentStackPointer(cur *instruction, execCtr regalloc.VRe cur = linkInstr(cur, movSp) strSp := m.allocateInstr() - strSp.asStore(operandNR(tmpRegVReg), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - rn: execCtr, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(), - }, 64) + mode := m.amodePool.Allocate() + *mode = addressMode{ + kind: addressModeKindRegUnsignedImm12, + rn: execCtr, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(), + } + strSp.asStore(operandNR(tmpRegVReg), mode, 64) cur = linkInstr(cur, strSp) return cur } @@ -376,27 +379,28 @@ func (m *machine) saveCurrentStackPointer(cur *instruction, execCtr regalloc.VRe func (m *machine) goFunctionCallLoadStackArg(cur *instruction, originalArg0Reg regalloc.VReg, arg *backend.ABIArg, intVReg, floatVReg regalloc.VReg) (*instruction, regalloc.VReg) { load := m.allocateInstr() var result regalloc.VReg - mode := addressMode{kind: addressModeKindPostIndex, rn: originalArg0Reg} + mode := m.amodePool.Allocate() + *mode = addressMode{kind: addressModeKindPostIndex, rn: originalArg0Reg} switch arg.Type { case ssa.TypeI32: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - load.asULoad(operandNR(intVReg), mode, 32) + load.asULoad(intVReg, mode, 32) result = intVReg case ssa.TypeI64: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - load.asULoad(operandNR(intVReg), mode, 64) + load.asULoad(intVReg, mode, 64) result = intVReg case ssa.TypeF32: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - load.asFpuLoad(operandNR(floatVReg), mode, 32) + load.asFpuLoad(floatVReg, mode, 32) result = floatVReg case ssa.TypeF64: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - load.asFpuLoad(operandNR(floatVReg), mode, 64) + load.asFpuLoad(floatVReg, mode, 64) result = floatVReg case ssa.TypeV128: mode.imm = 16 - load.asFpuLoad(operandNR(floatVReg), mode, 128) + load.asFpuLoad(floatVReg, mode, 128) result = floatVReg default: panic("TODO") @@ -408,7 +412,8 @@ func (m *machine) goFunctionCallLoadStackArg(cur *instruction, originalArg0Reg r func (m *machine) goFunctionCallStoreStackResult(cur *instruction, originalRet0Reg regalloc.VReg, result *backend.ABIArg, resultVReg regalloc.VReg) *instruction { store := m.allocateInstr() - mode := addressMode{kind: addressModeKindPostIndex, rn: originalRet0Reg} + mode := m.amodePool.Allocate() + *mode = addressMode{kind: addressModeKindPostIndex, rn: originalRet0Reg} var sizeInBits byte switch result.Type { case ssa.TypeI32, ssa.TypeF32: diff --git a/internal/engine/wazevo/backend/isa/arm64/instr.go b/internal/engine/wazevo/backend/isa/arm64/instr.go index 8aabc5997b..7121cb5382 100644 --- a/internal/engine/wazevo/backend/isa/arm64/instr.go +++ b/internal/engine/wazevo/backend/isa/arm64/instr.go @@ -3,10 +3,12 @@ package arm64 import ( "fmt" "math" + "unsafe" "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" + "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" ) type ( @@ -22,9 +24,9 @@ type ( // TODO: optimize the layout later once the impl settles. instruction struct { prev, next *instruction - u1, u2, u3 uint64 - rd, rm, rn, ra operand - amode addressMode + u1, u2 uint64 + rd regalloc.VReg + rm, rn operand kind instructionKind addedBeforeRegAlloc bool } @@ -174,7 +176,7 @@ func (i *instruction) Defs(regs *[]regalloc.VReg) []regalloc.VReg { switch defKinds[i.kind] { case defKindNone: case defKindRD: - *regs = append(*regs, i.rd.nr()) + *regs = append(*regs, i.rd) case defKindCall: _, _, retIntRealRegs, retFloatRealRegs, _ := backend.ABIInfoFromUint64(i.u2) for i := byte(0); i < retIntRealRegs; i++ { @@ -194,7 +196,7 @@ func (i *instruction) AssignDef(reg regalloc.VReg) { switch defKinds[i.kind] { case defKindNone: case defKindRD: - i.rd = i.rd.assignReg(reg) + i.rd = reg case defKindCall: panic("BUG: call instructions shouldn't be assigned") default: @@ -329,7 +331,7 @@ func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg { if rm := i.rm.reg(); rm.Valid() { *regs = append(*regs, rm) } - if ra := i.ra.reg(); ra.Valid() { + if ra := regalloc.VReg(i.u2); ra.Valid() { *regs = append(*regs, ra) } case useKindRNRN1RM: @@ -341,18 +343,20 @@ func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg { *regs = append(*regs, rm) } case useKindAMode: - if amodeRN := i.amode.rn; amodeRN.Valid() { + amode := i.getAmode() + if amodeRN := amode.rn; amodeRN.Valid() { *regs = append(*regs, amodeRN) } - if amodeRM := i.amode.rm; amodeRM.Valid() { + if amodeRM := amode.rm; amodeRM.Valid() { *regs = append(*regs, amodeRM) } case useKindRNAMode: *regs = append(*regs, i.rn.reg()) - if amodeRN := i.amode.rn; amodeRN.Valid() { + amode := i.getAmode() + if amodeRN := amode.rn; amodeRN.Valid() { *regs = append(*regs, amodeRN) } - if amodeRM := i.amode.rm; amodeRM.Valid() { + if amodeRM := amode.rm; amodeRM.Valid() { *regs = append(*regs, amodeRM) } case useKindCond: @@ -374,7 +378,7 @@ func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg { case useKindRDRewrite: *regs = append(*regs, i.rn.reg()) *regs = append(*regs, i.rm.reg()) - *regs = append(*regs, i.rd.reg()) + *regs = append(*regs, i.rd) default: panic(fmt.Sprintf("useKind for %v not defined", i)) } @@ -408,8 +412,8 @@ func (i *instruction) AssignUse(index int, reg regalloc.VReg) { i.rm = i.rm.assignReg(reg) } } else { - if rd := i.rd.reg(); rd.Valid() { - i.rd = i.rd.assignReg(reg) + if rd := i.rd; rd.Valid() { + i.rd = reg } } case useKindRNRN1RM: @@ -435,32 +439,36 @@ func (i *instruction) AssignUse(index int, reg regalloc.VReg) { i.rm = i.rm.assignReg(reg) } } else { - if ra := i.ra.reg(); ra.Valid() { - i.ra = i.ra.assignReg(reg) + if ra := regalloc.VReg(i.u2); ra.Valid() { + i.u2 = uint64(reg) } } case useKindAMode: if index == 0 { - if amodeRN := i.amode.rn; amodeRN.Valid() { - i.amode.rn = reg + amode := i.getAmode() + if amodeRN := amode.rn; amodeRN.Valid() { + amode.rn = reg } } else { - if amodeRM := i.amode.rm; amodeRM.Valid() { - i.amode.rm = reg + amode := i.getAmode() + if amodeRM := amode.rm; amodeRM.Valid() { + amode.rm = reg } } case useKindRNAMode: if index == 0 { i.rn = i.rn.assignReg(reg) } else if index == 1 { - if amodeRN := i.amode.rn; amodeRN.Valid() { - i.amode.rn = reg + amode := i.getAmode() + if amodeRN := amode.rn; amodeRN.Valid() { + amode.rn = reg } else { panic("BUG") } } else { - if amodeRM := i.amode.rm; amodeRM.Valid() { - i.amode.rm = reg + amode := i.getAmode() + if amodeRM := amode.rm; amodeRM.Valid() { + amode.rm = reg } else { panic("BUG") } @@ -503,35 +511,35 @@ func (i *instruction) callFuncRef() ssa.FuncRef { } // shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false) -func (i *instruction) asMOVZ(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) { +func (i *instruction) asMOVZ(dst regalloc.VReg, imm uint64, shift uint32, dst64bit bool) { i.kind = movZ - i.rd = operandNR(dst) + i.rd = dst i.u1 = imm - i.u2 = shift + i.u2 = uint64(shift) if dst64bit { - i.u3 = 1 + i.u2 |= 1 << 32 } } // shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false) -func (i *instruction) asMOVK(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) { +func (i *instruction) asMOVK(dst regalloc.VReg, imm uint64, shift uint32, dst64bit bool) { i.kind = movK - i.rd = operandNR(dst) + i.rd = dst i.u1 = imm - i.u2 = shift + i.u2 = uint64(shift) if dst64bit { - i.u3 = 1 + i.u2 |= 1 << 32 } } // shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false) -func (i *instruction) asMOVN(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) { +func (i *instruction) asMOVN(dst regalloc.VReg, imm uint64, shift uint32, dst64bit bool) { i.kind = movN - i.rd = operandNR(dst) + i.rd = dst i.u1 = imm - i.u2 = shift + i.u2 = uint64(shift) if dst64bit { - i.u3 = 1 + i.u2 |= 1 << 32 } } @@ -553,21 +561,21 @@ func (i *instruction) asRet() { i.kind = ret } -func (i *instruction) asStorePair64(src1, src2 regalloc.VReg, amode addressMode) { +func (i *instruction) asStorePair64(src1, src2 regalloc.VReg, amode *addressMode) { i.kind = storeP64 i.rn = operandNR(src1) i.rm = operandNR(src2) - i.amode = amode + i.setAmode(amode) } -func (i *instruction) asLoadPair64(src1, src2 regalloc.VReg, amode addressMode) { +func (i *instruction) asLoadPair64(src1, src2 regalloc.VReg, amode *addressMode) { i.kind = loadP64 i.rn = operandNR(src1) i.rm = operandNR(src2) - i.amode = amode + i.setAmode(amode) } -func (i *instruction) asStore(src operand, amode addressMode, sizeInBits byte) { +func (i *instruction) asStore(src operand, amode *addressMode, sizeInBits byte) { switch sizeInBits { case 8: i.kind = store8 @@ -589,10 +597,10 @@ func (i *instruction) asStore(src operand, amode addressMode, sizeInBits byte) { i.kind = fpuStore128 } i.rn = src - i.amode = amode + i.setAmode(amode) } -func (i *instruction) asSLoad(dst operand, amode addressMode, sizeInBits byte) { +func (i *instruction) asSLoad(dst regalloc.VReg, amode *addressMode, sizeInBits byte) { switch sizeInBits { case 8: i.kind = sLoad8 @@ -604,10 +612,10 @@ func (i *instruction) asSLoad(dst operand, amode addressMode, sizeInBits byte) { panic("BUG") } i.rd = dst - i.amode = amode + i.setAmode(amode) } -func (i *instruction) asULoad(dst operand, amode addressMode, sizeInBits byte) { +func (i *instruction) asULoad(dst regalloc.VReg, amode *addressMode, sizeInBits byte) { switch sizeInBits { case 8: i.kind = uLoad8 @@ -619,10 +627,10 @@ func (i *instruction) asULoad(dst operand, amode addressMode, sizeInBits byte) { i.kind = uLoad64 } i.rd = dst - i.amode = amode + i.setAmode(amode) } -func (i *instruction) asFpuLoad(dst operand, amode addressMode, sizeInBits byte) { +func (i *instruction) asFpuLoad(dst regalloc.VReg, amode *addressMode, sizeInBits byte) { switch sizeInBits { case 32: i.kind = fpuLoad32 @@ -632,10 +640,18 @@ func (i *instruction) asFpuLoad(dst operand, amode addressMode, sizeInBits byte) i.kind = fpuLoad128 } i.rd = dst - i.amode = amode + i.setAmode(amode) } -func (i *instruction) asVecLoad1R(rd, rn operand, arr vecArrangement) { +func (i *instruction) getAmode() *addressMode { + return wazevoapi.PtrFromUintptr[addressMode](uintptr(i.u1)) +} + +func (i *instruction) setAmode(a *addressMode) { + i.u1 = uint64(uintptr(unsafe.Pointer(a))) +} + +func (i *instruction) asVecLoad1R(rd regalloc.VReg, rn operand, arr vecArrangement) { // NOTE: currently only has support for no-offset loads, though it is suspicious that // we would need to support offset load (that is only available for post-index). i.kind = vecLoad1R @@ -646,32 +662,32 @@ func (i *instruction) asVecLoad1R(rd, rn operand, arr vecArrangement) { func (i *instruction) asCSet(rd regalloc.VReg, mask bool, c condFlag) { i.kind = cSet - i.rd = operandNR(rd) + i.rd = rd i.u1 = uint64(c) if mask { i.u2 = 1 } } -func (i *instruction) asCSel(rd, rn, rm operand, c condFlag, _64bit bool) { +func (i *instruction) asCSel(rd regalloc.VReg, rn, rm operand, c condFlag, _64bit bool) { i.kind = cSel i.rd = rd i.rn = rn i.rm = rm i.u1 = uint64(c) if _64bit { - i.u3 = 1 + i.u2 = 1 } } -func (i *instruction) asFpuCSel(rd, rn, rm operand, c condFlag, _64bit bool) { +func (i *instruction) asFpuCSel(rd regalloc.VReg, rn, rm operand, c condFlag, _64bit bool) { i.kind = fpuCSel i.rd = rd i.rn = rn i.rm = rm i.u1 = uint64(c) if _64bit { - i.u3 = 1 + i.u2 = 1 } } @@ -691,7 +707,7 @@ func (i *instruction) asBrTableSequence(indexReg regalloc.VReg, targetIndex, tar } func (i *instruction) brTableSequenceOffsetsResolved() { - i.u3 = 1 // indicate that the offsets are resolved, for debugging. + i.rm.data = 1 // indicate that the offsets are resolved, for debugging. } func (i *instruction) brLabel() label { @@ -701,7 +717,7 @@ func (i *instruction) brLabel() label { // brOffsetResolved is called when the target label is resolved. func (i *instruction) brOffsetResolve(offset int64) { i.u2 = uint64(offset) - i.u3 = 1 // indicate that the offset is resolved, for debugging. + i.rm.data = 1 // indicate that the offset is resolved, for debugging. } func (i *instruction) brOffset() int64 { @@ -714,7 +730,7 @@ func (i *instruction) asCondBr(c cond, target label, is64bit bool) { i.u1 = c.asUint64() i.u2 = uint64(target) if is64bit { - i.u3 = 1 + i.u2 |= 1 << 32 } } @@ -728,17 +744,17 @@ func (i *instruction) condBrLabel() label { // condBrOffsetResolve is called when the target label is resolved. func (i *instruction) condBrOffsetResolve(offset int64) { - i.rd.data = uint64(offset) - i.rd.data2 = 1 // indicate that the offset is resolved, for debugging. + i.rn.data = uint64(offset) + i.rn.data2 = 1 // indicate that the offset is resolved, for debugging. } // condBrOffsetResolved returns true if condBrOffsetResolve is already called. func (i *instruction) condBrOffsetResolved() bool { - return i.rd.data2 == 1 + return i.rn.data2 == 1 } func (i *instruction) condBrOffset() int64 { - return int64(i.rd.data) + return int64(i.rn.data) } func (i *instruction) condBrCond() cond { @@ -746,33 +762,33 @@ func (i *instruction) condBrCond() cond { } func (i *instruction) condBr64bit() bool { - return i.u3 == 1 + return i.u2&(1<<32) != 0 } func (i *instruction) asLoadFpuConst32(rd regalloc.VReg, raw uint64) { i.kind = loadFpuConst32 i.u1 = raw - i.rd = operandNR(rd) + i.rd = rd } func (i *instruction) asLoadFpuConst64(rd regalloc.VReg, raw uint64) { i.kind = loadFpuConst64 i.u1 = raw - i.rd = operandNR(rd) + i.rd = rd } func (i *instruction) asLoadFpuConst128(rd regalloc.VReg, lo, hi uint64) { i.kind = loadFpuConst128 i.u1 = lo i.u2 = hi - i.rd = operandNR(rd) + i.rd = rd } func (i *instruction) asFpuCmp(rn, rm operand, is64bit bool) { i.kind = fpuCmp i.rn, i.rm = rn, rm if is64bit { - i.u3 = 1 + i.u1 = 1 } } @@ -783,12 +799,12 @@ func (i *instruction) asCCmpImm(rn operand, imm uint64, c condFlag, flag byte, i i.u1 = uint64(c) i.u2 = uint64(flag) if is64bit { - i.u3 = 1 + i.u2 |= 1 << 32 } } // asALU setups a basic ALU instruction. -func (i *instruction) asALU(aluOp aluOp, rd, rn, rm operand, dst64bit bool) { +func (i *instruction) asALU(aluOp aluOp, rd regalloc.VReg, rn, rm operand, dst64bit bool) { switch rm.kind { case operandKindNR: i.kind = aluRRR @@ -804,22 +820,22 @@ func (i *instruction) asALU(aluOp aluOp, rd, rn, rm operand, dst64bit bool) { i.u1 = uint64(aluOp) i.rd, i.rn, i.rm = rd, rn, rm if dst64bit { - i.u3 = 1 + i.u2 |= 1 << 32 } } // asALU setups a basic ALU instruction. -func (i *instruction) asALURRRR(aluOp aluOp, rd, rn, rm, ra operand, dst64bit bool) { +func (i *instruction) asALURRRR(aluOp aluOp, rd regalloc.VReg, rn, rm operand, ra regalloc.VReg, dst64bit bool) { i.kind = aluRRRR i.u1 = uint64(aluOp) - i.rd, i.rn, i.rm, i.ra = rd, rn, rm, ra + i.rd, i.rn, i.rm, i.u2 = rd, rn, rm, uint64(ra) if dst64bit { - i.u3 = 1 + i.u1 |= 1 << 32 } } // asALUShift setups a shift based ALU instruction. -func (i *instruction) asALUShift(aluOp aluOp, rd, rn, rm operand, dst64bit bool) { +func (i *instruction) asALUShift(aluOp aluOp, rd regalloc.VReg, rn, rm operand, dst64bit bool) { switch rm.kind { case operandKindNR: i.kind = aluRRR // If the shift amount op is a register, then the instruction is encoded as a normal ALU instruction with two register operands. @@ -831,17 +847,17 @@ func (i *instruction) asALUShift(aluOp aluOp, rd, rn, rm operand, dst64bit bool) i.u1 = uint64(aluOp) i.rd, i.rn, i.rm = rd, rn, rm if dst64bit { - i.u3 = 1 + i.u2 |= 1 << 32 } } func (i *instruction) asALUBitmaskImm(aluOp aluOp, rd, rn regalloc.VReg, imm uint64, dst64bit bool) { i.kind = aluRRBitmaskImm i.u1 = uint64(aluOp) - i.rn, i.rd = operandNR(rn), operandNR(rd) + i.rn, i.rd = operandNR(rn), rd i.u2 = imm if dst64bit { - i.u3 = 1 + i.u1 |= 1 << 32 } } @@ -852,76 +868,76 @@ func (i *instruction) asMovToFPSR(rn regalloc.VReg) { func (i *instruction) asMovFromFPSR(rd regalloc.VReg) { i.kind = movFromFPSR - i.rd = operandNR(rd) + i.rd = rd } func (i *instruction) asBitRR(bitOp bitOp, rd, rn regalloc.VReg, is64bit bool) { i.kind = bitRR - i.rn, i.rd = operandNR(rn), operandNR(rd) + i.rn, i.rd = operandNR(rn), rd i.u1 = uint64(bitOp) if is64bit { i.u2 = 1 } } -func (i *instruction) asFpuRRR(op fpuBinOp, rd, rn, rm operand, dst64bit bool) { +func (i *instruction) asFpuRRR(op fpuBinOp, rd regalloc.VReg, rn, rm operand, dst64bit bool) { i.kind = fpuRRR i.u1 = uint64(op) i.rd, i.rn, i.rm = rd, rn, rm if dst64bit { - i.u3 = 1 + i.u2 = 1 } } -func (i *instruction) asFpuRR(op fpuUniOp, rd, rn operand, dst64bit bool) { +func (i *instruction) asFpuRR(op fpuUniOp, rd regalloc.VReg, rn operand, dst64bit bool) { i.kind = fpuRR i.u1 = uint64(op) i.rd, i.rn = rd, rn if dst64bit { - i.u3 = 1 + i.u2 = 1 } } func (i *instruction) asExtend(rd, rn regalloc.VReg, fromBits, toBits byte, signed bool) { i.kind = extend - i.rn, i.rd = operandNR(rn), operandNR(rd) + i.rn, i.rd = operandNR(rn), rd i.u1 = uint64(fromBits) i.u2 = uint64(toBits) if signed { - i.u3 = 1 + i.u2 |= 1 << 32 } } func (i *instruction) asMove32(rd, rn regalloc.VReg) { i.kind = mov32 - i.rn, i.rd = operandNR(rn), operandNR(rd) + i.rn, i.rd = operandNR(rn), rd } func (i *instruction) asMove64(rd, rn regalloc.VReg) *instruction { i.kind = mov64 - i.rn, i.rd = operandNR(rn), operandNR(rd) + i.rn, i.rd = operandNR(rn), rd return i } func (i *instruction) asFpuMov64(rd, rn regalloc.VReg) { i.kind = fpuMov64 - i.rn, i.rd = operandNR(rn), operandNR(rd) + i.rn, i.rd = operandNR(rn), rd } func (i *instruction) asFpuMov128(rd, rn regalloc.VReg) *instruction { i.kind = fpuMov128 - i.rn, i.rd = operandNR(rn), operandNR(rd) + i.rn, i.rd = operandNR(rn), rd return i } -func (i *instruction) asMovToVec(rd, rn operand, arr vecArrangement, index vecIndex) { +func (i *instruction) asMovToVec(rd regalloc.VReg, rn operand, arr vecArrangement, index vecIndex) { i.kind = movToVec i.rd = rd i.rn = rn i.u1, i.u2 = uint64(arr), uint64(index) } -func (i *instruction) asMovFromVec(rd, rn operand, arr vecArrangement, index vecIndex, signed bool) { +func (i *instruction) asMovFromVec(rd regalloc.VReg, rn operand, arr vecArrangement, index vecIndex, signed bool) { if signed { i.kind = movFromVecSigned } else { @@ -932,48 +948,48 @@ func (i *instruction) asMovFromVec(rd, rn operand, arr vecArrangement, index vec i.u1, i.u2 = uint64(arr), uint64(index) } -func (i *instruction) asVecDup(rd, rn operand, arr vecArrangement) { +func (i *instruction) asVecDup(rd regalloc.VReg, rn operand, arr vecArrangement) { i.kind = vecDup i.u1 = uint64(arr) i.rn, i.rd = rn, rd } -func (i *instruction) asVecDupElement(rd, rn operand, arr vecArrangement, index vecIndex) { +func (i *instruction) asVecDupElement(rd regalloc.VReg, rn operand, arr vecArrangement, index vecIndex) { i.kind = vecDupElement i.u1 = uint64(arr) i.rn, i.rd = rn, rd i.u2 = uint64(index) } -func (i *instruction) asVecExtract(rd, rn, rm operand, arr vecArrangement, index uint32) { +func (i *instruction) asVecExtract(rd regalloc.VReg, rn, rm operand, arr vecArrangement, index uint32) { i.kind = vecExtract i.u1 = uint64(arr) i.rn, i.rm, i.rd = rn, rm, rd i.u2 = uint64(index) } -func (i *instruction) asVecMovElement(rd, rn operand, arr vecArrangement, rdIndex, rnIndex vecIndex) { +func (i *instruction) asVecMovElement(rd regalloc.VReg, rn operand, arr vecArrangement, rdIndex, rnIndex vecIndex) { i.kind = vecMovElement i.u1 = uint64(arr) - i.u2, i.u3 = uint64(rdIndex), uint64(rnIndex) + i.u2 = uint64(rdIndex) | uint64(rnIndex)<<32 i.rn, i.rd = rn, rd } -func (i *instruction) asVecMisc(op vecOp, rd, rn operand, arr vecArrangement) { +func (i *instruction) asVecMisc(op vecOp, rd regalloc.VReg, rn operand, arr vecArrangement) { i.kind = vecMisc i.u1 = uint64(op) i.rn, i.rd = rn, rd i.u2 = uint64(arr) } -func (i *instruction) asVecLanes(op vecOp, rd, rn operand, arr vecArrangement) { +func (i *instruction) asVecLanes(op vecOp, rd regalloc.VReg, rn operand, arr vecArrangement) { i.kind = vecLanes i.u1 = uint64(op) i.rn, i.rd = rn, rd i.u2 = uint64(arr) } -func (i *instruction) asVecShiftImm(op vecOp, rd, rn, rm operand, arr vecArrangement) *instruction { +func (i *instruction) asVecShiftImm(op vecOp, rd regalloc.VReg, rn, rm operand, arr vecArrangement) *instruction { i.kind = vecShiftImm i.u1 = uint64(op) i.rn, i.rm, i.rd = rn, rm, rd @@ -981,7 +997,7 @@ func (i *instruction) asVecShiftImm(op vecOp, rd, rn, rm operand, arr vecArrange return i } -func (i *instruction) asVecTbl(nregs byte, rd, rn, rm operand, arr vecArrangement) { +func (i *instruction) asVecTbl(nregs byte, rd regalloc.VReg, rn, rm operand, arr vecArrangement) { switch nregs { case 0, 1: i.kind = vecTbl @@ -1000,14 +1016,14 @@ func (i *instruction) asVecTbl(nregs byte, rd, rn, rm operand, arr vecArrangemen i.u2 = uint64(arr) } -func (i *instruction) asVecPermute(op vecOp, rd, rn, rm operand, arr vecArrangement) { +func (i *instruction) asVecPermute(op vecOp, rd regalloc.VReg, rn, rm operand, arr vecArrangement) { i.kind = vecPermute i.u1 = uint64(op) i.rn, i.rm, i.rd = rn, rm, rd i.u2 = uint64(arr) } -func (i *instruction) asVecRRR(op vecOp, rd, rn, rm operand, arr vecArrangement) *instruction { +func (i *instruction) asVecRRR(op vecOp, rd regalloc.VReg, rn, rm operand, arr vecArrangement) *instruction { i.kind = vecRRR i.u1 = uint64(op) i.rn, i.rd, i.rm = rn, rd, rm @@ -1017,7 +1033,7 @@ func (i *instruction) asVecRRR(op vecOp, rd, rn, rm operand, arr vecArrangement) // asVecRRRRewrite encodes a vector instruction that rewrites the destination register. // IMPORTANT: the destination register must be already defined before this instruction. -func (i *instruction) asVecRRRRewrite(op vecOp, rd, rn, rm operand, arr vecArrangement) { +func (i *instruction) asVecRRRRewrite(op vecOp, rd regalloc.VReg, rn, rm operand, arr vecArrangement) { i.kind = vecRRRRewrite i.u1 = uint64(op) i.rn, i.rd, i.rm = rn, rd, rm @@ -1033,8 +1049,8 @@ func (i *instruction) IsCopy() bool { // String implements fmt.Stringer. func (i *instruction) String() (str string) { - is64SizeBitToSize := func(u3 uint64) byte { - if u3 == 0 { + is64SizeBitToSize := func(v uint64) byte { + if v == 0 { return 32 } return 64 @@ -1049,46 +1065,46 @@ func (i *instruction) String() (str string) { str = "nop0" } case aluRRR: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2 >> 32) str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(), - formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), i.rm.format(size)) case aluRRRR: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u1 >> 32) str = fmt.Sprintf("%s %s, %s, %s, %s", aluOp(i.u1).String(), - formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size), formatVRegSized(i.ra.nr(), size)) + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size), formatVRegSized(regalloc.VReg(i.u2), size)) case aluRRImm12: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2 >> 32) str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(), - formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), i.rm.format(size)) + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), i.rm.format(size)) case aluRRBitmaskImm: - size := is64SizeBitToSize(i.u3) - rd, rn := formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size) + size := is64SizeBitToSize(i.u1 >> 32) + rd, rn := formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size) if size == 32 { str = fmt.Sprintf("%s %s, %s, #%#x", aluOp(i.u1).String(), rd, rn, uint32(i.u2)) } else { str = fmt.Sprintf("%s %s, %s, #%#x", aluOp(i.u1).String(), rd, rn, i.u2) } case aluRRImmShift: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2 >> 32) str = fmt.Sprintf("%s %s, %s, %#x", aluOp(i.u1).String(), - formatVRegSized(i.rd.nr(), size), + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), i.rm.shiftImm(), ) case aluRRRShift: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2 >> 32) str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(), - formatVRegSized(i.rd.nr(), size), + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), i.rm.format(size), ) case aluRRRExtend: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2 >> 32) str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(), - formatVRegSized(i.rd.nr(), size), + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), // Regardless of the source size, the register is formatted in 32-bit. i.rm.format(32), @@ -1097,57 +1113,57 @@ func (i *instruction) String() (str string) { size := is64SizeBitToSize(i.u2) str = fmt.Sprintf("%s %s, %s", bitOp(i.u1), - formatVRegSized(i.rd.nr(), size), + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), ) case uLoad8: - str = fmt.Sprintf("ldrb %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) + str = fmt.Sprintf("ldrb %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32)) case sLoad8: - str = fmt.Sprintf("ldrsb %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) + str = fmt.Sprintf("ldrsb %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32)) case uLoad16: - str = fmt.Sprintf("ldrh %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) + str = fmt.Sprintf("ldrh %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32)) case sLoad16: - str = fmt.Sprintf("ldrsh %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) + str = fmt.Sprintf("ldrsh %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32)) case uLoad32: - str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) + str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32)) case sLoad32: - str = fmt.Sprintf("ldrs %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) + str = fmt.Sprintf("ldrs %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32)) case uLoad64: - str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 64), i.amode.format(64)) + str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 64), i.getAmode().format(64)) case store8: - str = fmt.Sprintf("strb %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(8)) + str = fmt.Sprintf("strb %s, %s", formatVRegSized(i.rn.nr(), 32), i.getAmode().format(8)) case store16: - str = fmt.Sprintf("strh %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(16)) + str = fmt.Sprintf("strh %s, %s", formatVRegSized(i.rn.nr(), 32), i.getAmode().format(16)) case store32: - str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(32)) + str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.getAmode().format(32)) case store64: - str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.amode.format(64)) + str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.getAmode().format(64)) case storeP64: str = fmt.Sprintf("stp %s, %s, %s", - formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.amode.format(64)) + formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.getAmode().format(64)) case loadP64: str = fmt.Sprintf("ldp %s, %s, %s", - formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.amode.format(64)) + formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.getAmode().format(64)) case mov64: str = fmt.Sprintf("mov %s, %s", - formatVRegSized(i.rd.nr(), 64), + formatVRegSized(i.rd, 64), formatVRegSized(i.rn.nr(), 64)) case mov32: - str = fmt.Sprintf("mov %s, %s", formatVRegSized(i.rd.nr(), 32), formatVRegSized(i.rn.nr(), 32)) + str = fmt.Sprintf("mov %s, %s", formatVRegSized(i.rd, 32), formatVRegSized(i.rn.nr(), 32)) case movZ: - size := is64SizeBitToSize(i.u3) - str = fmt.Sprintf("movz %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16) + size := is64SizeBitToSize(i.u2 >> 32) + str = fmt.Sprintf("movz %s, #%#x, lsl %d", formatVRegSized(i.rd, size), uint16(i.u1), uint32(i.u2)*16) case movN: - size := is64SizeBitToSize(i.u3) - str = fmt.Sprintf("movn %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16) + size := is64SizeBitToSize(i.u2 >> 32) + str = fmt.Sprintf("movn %s, #%#x, lsl %d", formatVRegSized(i.rd, size), uint16(i.u1), uint32(i.u2)*16) case movK: - size := is64SizeBitToSize(i.u3) - str = fmt.Sprintf("movk %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16) + size := is64SizeBitToSize(i.u2 >> 32) + str = fmt.Sprintf("movk %s, #%#x, lsl %d", formatVRegSized(i.rd, size), uint16(i.u1), uint32(i.u2)*16) case extend: fromBits, toBits := byte(i.u1), byte(i.u2) var signedStr string - if i.u3 == 1 { + if i.u2>>32 == 1 { signedStr = "s" } else { signedStr = "u" @@ -1161,39 +1177,39 @@ func (i *instruction) String() (str string) { case 32: fromStr = "w" } - str = fmt.Sprintf("%sxt%s %s, %s", signedStr, fromStr, formatVRegSized(i.rd.nr(), toBits), formatVRegSized(i.rn.nr(), 32)) + str = fmt.Sprintf("%sxt%s %s, %s", signedStr, fromStr, formatVRegSized(i.rd, toBits), formatVRegSized(i.rn.nr(), 32)) case cSel: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2) str = fmt.Sprintf("csel %s, %s, %s, %s", - formatVRegSized(i.rd.nr(), size), + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size), condFlag(i.u1), ) case cSet: if i.u2 != 0 { - str = fmt.Sprintf("csetm %s, %s", formatVRegSized(i.rd.nr(), 64), condFlag(i.u1)) + str = fmt.Sprintf("csetm %s, %s", formatVRegSized(i.rd, 64), condFlag(i.u1)) } else { - str = fmt.Sprintf("cset %s, %s", formatVRegSized(i.rd.nr(), 64), condFlag(i.u1)) + str = fmt.Sprintf("cset %s, %s", formatVRegSized(i.rd, 64), condFlag(i.u1)) } case cCmpImm: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2 >> 32) str = fmt.Sprintf("ccmp %s, #%#x, #%#x, %s", formatVRegSized(i.rn.nr(), size), i.rm.data, i.u2&0b1111, condFlag(i.u1)) case fpuMov64: str = fmt.Sprintf("mov %s, %s", - formatVRegVec(i.rd.nr(), vecArrangement8B, vecIndexNone), + formatVRegVec(i.rd, vecArrangement8B, vecIndexNone), formatVRegVec(i.rn.nr(), vecArrangement8B, vecIndexNone)) case fpuMov128: str = fmt.Sprintf("mov %s, %s", - formatVRegVec(i.rd.nr(), vecArrangement16B, vecIndexNone), + formatVRegVec(i.rd, vecArrangement16B, vecIndexNone), formatVRegVec(i.rn.nr(), vecArrangement16B, vecIndexNone)) case fpuMovFromVec: panic("TODO") case fpuRR: - dstSz := is64SizeBitToSize(i.u3) + dstSz := is64SizeBitToSize(i.u2) srcSz := dstSz op := fpuUniOp(i.u1) switch op { @@ -1203,38 +1219,38 @@ func (i *instruction) String() (str string) { srcSz = 64 } str = fmt.Sprintf("%s %s, %s", op.String(), - formatVRegSized(i.rd.nr(), dstSz), formatVRegSized(i.rn.nr(), srcSz)) + formatVRegSized(i.rd, dstSz), formatVRegSized(i.rn.nr(), srcSz)) case fpuRRR: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2) str = fmt.Sprintf("%s %s, %s, %s", fpuBinOp(i.u1).String(), - formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size)) + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size)) case fpuRRI: panic("TODO") case fpuRRRR: panic("TODO") case fpuCmp: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u1) str = fmt.Sprintf("fcmp %s, %s", formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size)) case fpuLoad32: - str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) + str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32)) case fpuStore32: - str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(64)) + str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.getAmode().format(64)) case fpuLoad64: - str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 64), i.amode.format(64)) + str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 64), i.getAmode().format(64)) case fpuStore64: - str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.amode.format(64)) + str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.getAmode().format(64)) case fpuLoad128: - str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 128), i.amode.format(64)) + str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 128), i.getAmode().format(64)) case fpuStore128: - str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 128), i.amode.format(64)) + str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 128), i.getAmode().format(64)) case loadFpuConst32: - str = fmt.Sprintf("ldr %s, #8; b 8; data.f32 %f", formatVRegSized(i.rd.nr(), 32), math.Float32frombits(uint32(i.u1))) + str = fmt.Sprintf("ldr %s, #8; b 8; data.f32 %f", formatVRegSized(i.rd, 32), math.Float32frombits(uint32(i.u1))) case loadFpuConst64: - str = fmt.Sprintf("ldr %s, #8; b 16; data.f64 %f", formatVRegSized(i.rd.nr(), 64), math.Float64frombits(i.u1)) + str = fmt.Sprintf("ldr %s, #8; b 16; data.f64 %f", formatVRegSized(i.rd, 64), math.Float64frombits(i.u1)) case loadFpuConst128: str = fmt.Sprintf("ldr %s, #8; b 32; data.v128 %016x %016x", - formatVRegSized(i.rd.nr(), 128), i.u1, i.u2) + formatVRegSized(i.rd, 128), i.u1, i.u2) case fpuToInt: var op, src, dst string if signed := i.u1 == 1; signed { @@ -1242,15 +1258,15 @@ func (i *instruction) String() (str string) { } else { op = "fcvtzu" } - if src64 := i.u2 == 1; src64 { + if src64 := i.u2&1 != 0; src64 { src = formatVRegWidthVec(i.rn.nr(), vecArrangementD) } else { src = formatVRegWidthVec(i.rn.nr(), vecArrangementS) } - if dst64 := i.u3 == 1; dst64 { - dst = formatVRegSized(i.rd.nr(), 64) + if dst64 := i.u2&2 != 0; dst64 { + dst = formatVRegSized(i.rd, 64) } else { - dst = formatVRegSized(i.rd.nr(), 32) + dst = formatVRegSized(i.rd, 32) } str = fmt.Sprintf("%s %s, %s", op, dst, src) @@ -1261,21 +1277,21 @@ func (i *instruction) String() (str string) { } else { op = "ucvtf" } - if src64 := i.u2 == 1; src64 { + if src64 := i.u2&1 != 0; src64 { src = formatVRegSized(i.rn.nr(), 64) } else { src = formatVRegSized(i.rn.nr(), 32) } - if dst64 := i.u3 == 1; dst64 { - dst = formatVRegWidthVec(i.rd.nr(), vecArrangementD) + if dst64 := i.u2&2 != 0; dst64 { + dst = formatVRegWidthVec(i.rd, vecArrangementD) } else { - dst = formatVRegWidthVec(i.rd.nr(), vecArrangementS) + dst = formatVRegWidthVec(i.rd, vecArrangementS) } str = fmt.Sprintf("%s %s, %s", op, dst, src) case fpuCSel: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2) str = fmt.Sprintf("fcsel %s, %s, %s, %s", - formatVRegSized(i.rd.nr(), size), + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size), condFlag(i.u1), @@ -1291,7 +1307,7 @@ func (i *instruction) String() (str string) { default: panic("unsupported arrangement " + arr.String()) } - str = fmt.Sprintf("ins %s, %s", formatVRegVec(i.rd.nr(), arr, vecIndex(i.u2)), formatVRegSized(i.rn.nr(), size)) + str = fmt.Sprintf("ins %s, %s", formatVRegVec(i.rd, arr, vecIndex(i.u2)), formatVRegSized(i.rn.nr(), size)) case movFromVec, movFromVecSigned: var size byte var opcode string @@ -1315,23 +1331,23 @@ func (i *instruction) String() (str string) { default: panic("unsupported arrangement " + arr.String()) } - str = fmt.Sprintf("%s %s, %s", opcode, formatVRegSized(i.rd.nr(), size), formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2))) + str = fmt.Sprintf("%s %s, %s", opcode, formatVRegSized(i.rd, size), formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2))) case vecDup: str = fmt.Sprintf("dup %s, %s", - formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone), + formatVRegVec(i.rd, vecArrangement(i.u1), vecIndexNone), formatVRegSized(i.rn.nr(), 64), ) case vecDupElement: arr := vecArrangement(i.u1) str = fmt.Sprintf("dup %s, %s", - formatVRegVec(i.rd.nr(), arr, vecIndexNone), + formatVRegVec(i.rd, arr, vecIndexNone), formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2)), ) case vecDupFromFpu: panic("TODO") case vecExtract: str = fmt.Sprintf("ext %s, %s, %s, #%d", - formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone), + formatVRegVec(i.rd, vecArrangement(i.u1), vecIndexNone), formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndexNone), formatVRegVec(i.rm.nr(), vecArrangement(i.u1), vecIndexNone), uint32(i.u2), @@ -1340,15 +1356,15 @@ func (i *instruction) String() (str string) { panic("TODO") case vecMovElement: str = fmt.Sprintf("mov %s, %s", - formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndex(i.u2)), - formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndex(i.u3)), + formatVRegVec(i.rd, vecArrangement(i.u1), vecIndex(i.u2&0xffffffff)), + formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndex(i.u2>>32)), ) case vecMiscNarrow: panic("TODO") case vecRRR, vecRRRRewrite: str = fmt.Sprintf("%s %s, %s, %s", vecOp(i.u1), - formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone), + formatVRegVec(i.rd, vecArrangement(i.u2), vecIndexNone), formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone), formatVRegVec(i.rm.nr(), vecArrangement(i.u2), vecIndexNone), ) @@ -1356,12 +1372,12 @@ func (i *instruction) String() (str string) { vop := vecOp(i.u1) if vop == vecOpCmeq0 { str = fmt.Sprintf("cmeq %s, %s, #0", - formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone), + formatVRegVec(i.rd, vecArrangement(i.u2), vecIndexNone), formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone)) } else { str = fmt.Sprintf("%s %s, %s", vop, - formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone), + formatVRegVec(i.rd, vecArrangement(i.u2), vecIndexNone), formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone)) } case vecLanes: @@ -1379,24 +1395,24 @@ func (i *instruction) String() (str string) { } str = fmt.Sprintf("%s %s, %s", vecOp(i.u1), - formatVRegWidthVec(i.rd.nr(), destArr), + formatVRegWidthVec(i.rd, destArr), formatVRegVec(i.rn.nr(), arr, vecIndexNone)) case vecShiftImm: arr := vecArrangement(i.u2) str = fmt.Sprintf("%s %s, %s, #%d", vecOp(i.u1), - formatVRegVec(i.rd.nr(), arr, vecIndexNone), + formatVRegVec(i.rd, arr, vecIndexNone), formatVRegVec(i.rn.nr(), arr, vecIndexNone), i.rm.shiftImm()) case vecTbl: arr := vecArrangement(i.u2) str = fmt.Sprintf("tbl %s, { %s }, %s", - formatVRegVec(i.rd.nr(), arr, vecIndexNone), + formatVRegVec(i.rd, arr, vecIndexNone), formatVRegVec(i.rn.nr(), vecArrangement16B, vecIndexNone), formatVRegVec(i.rm.nr(), arr, vecIndexNone)) case vecTbl2: arr := vecArrangement(i.u2) - rd, rn, rm := i.rd.nr(), i.rn.nr(), i.rm.nr() + rd, rn, rm := i.rd, i.rn.nr(), i.rm.nr() rn1 := regalloc.FromRealReg(rn.RealReg()+1, rn.RegType()) str = fmt.Sprintf("tbl %s, { %s, %s }, %s", formatVRegVec(rd, arr, vecIndexNone), @@ -1407,13 +1423,13 @@ func (i *instruction) String() (str string) { arr := vecArrangement(i.u2) str = fmt.Sprintf("%s %s, %s, %s", vecOp(i.u1), - formatVRegVec(i.rd.nr(), arr, vecIndexNone), + formatVRegVec(i.rd, arr, vecIndexNone), formatVRegVec(i.rn.nr(), arr, vecIndexNone), formatVRegVec(i.rm.nr(), arr, vecIndexNone)) case movToFPSR: str = fmt.Sprintf("msr fpsr, %s", formatVRegSized(i.rn.nr(), 64)) case movFromFPSR: - str = fmt.Sprintf("mrs %s fpsr", formatVRegSized(i.rd.nr(), 64)) + str = fmt.Sprintf("mrs %s fpsr", formatVRegSized(i.rd, 64)) case call: str = fmt.Sprintf("bl %s", ssa.FuncRef(i.u1)) case callInd: @@ -1422,15 +1438,15 @@ func (i *instruction) String() (str string) { str = "ret" case br: target := label(i.u1) - if i.u3 != 0 { + if i.rm.data != 0 { str = fmt.Sprintf("b #%#x (%s)", i.brOffset(), target.String()) } else { str = fmt.Sprintf("b %s", target.String()) } case condBr: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2 >> 32) c := cond(i.u1) - target := label(i.u2) + target := label(i.u2 & 0xffffffff) switch c.kind() { case condKindRegisterZero: if !i.condBrOffsetResolved() { @@ -1456,7 +1472,7 @@ func (i *instruction) String() (str string) { } } case adr: - str = fmt.Sprintf("adr %s, #%#x", formatVRegSized(i.rd.nr(), 64), int64(i.u1)) + str = fmt.Sprintf("adr %s, #%#x", formatVRegSized(i.rd, 64), int64(i.u1)) case brTableSequence: targetIndex := i.u1 str = fmt.Sprintf("br_table_sequence %s, table_index=%d", formatVRegSized(i.rn.nr(), 64), targetIndex) @@ -1473,7 +1489,7 @@ func (i *instruction) String() (str string) { case 1: m = m + "b" } - str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), 64)) + str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), 64)) case atomicCas: m := "casal" size := byte(32) @@ -1485,7 +1501,7 @@ func (i *instruction) String() (str string) { case 1: m = m + "b" } - str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rn.nr(), 64)) + str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rd, size), formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rn.nr(), 64)) case atomicLoad: m := "ldar" size := byte(32) @@ -1497,7 +1513,7 @@ func (i *instruction) String() (str string) { case 1: m = m + "b" } - str = fmt.Sprintf("%s %s, %s", m, formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), 64)) + str = fmt.Sprintf("%s %s, %s", m, formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), 64)) case atomicStore: m := "stlr" size := byte(32) @@ -1517,9 +1533,9 @@ func (i *instruction) String() (str string) { case emitSourceOffsetInfo: str = fmt.Sprintf("source_offset_info %d", ssa.SourceOffset(i.u1)) case vecLoad1R: - str = fmt.Sprintf("ld1r {%s}, [%s]", formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone), formatVRegSized(i.rn.nr(), 64)) + str = fmt.Sprintf("ld1r {%s}, [%s]", formatVRegVec(i.rd, vecArrangement(i.u1), vecIndexNone), formatVRegSized(i.rn.nr(), 64)) case loadConstBlockArg: - str = fmt.Sprintf("load_const_block_arg %s, %#x", formatVRegSized(i.rd.nr(), 64), i.u1) + str = fmt.Sprintf("load_const_block_arg %s, %#x", formatVRegSized(i.rd, 64), i.u1) default: panic(i.kind) } @@ -1528,26 +1544,26 @@ func (i *instruction) String() (str string) { func (i *instruction) asAdr(rd regalloc.VReg, offset int64) { i.kind = adr - i.rd = operandNR(rd) + i.rd = rd i.u1 = uint64(offset) } -func (i *instruction) asAtomicRmw(op atomicRmwOp, rn, rs, rt operand, size uint64) { +func (i *instruction) asAtomicRmw(op atomicRmwOp, rn, rs, rt regalloc.VReg, size uint64) { i.kind = atomicRmw - i.rd, i.rn, i.rm = rt, rn, rs + i.rd, i.rn, i.rm = rt, operandNR(rn), operandNR(rs) i.u1 = uint64(op) i.u2 = size } -func (i *instruction) asAtomicCas(rn, rs, rt operand, size uint64) { +func (i *instruction) asAtomicCas(rn, rs, rt regalloc.VReg, size uint64) { i.kind = atomicCas - i.rm, i.rn, i.rd = rt, rn, rs + i.rm, i.rn, i.rd = operandNR(rt), operandNR(rn), rs i.u2 = size } -func (i *instruction) asAtomicLoad(rn, rt operand, size uint64) { +func (i *instruction) asAtomicLoad(rn, rt regalloc.VReg, size uint64) { i.kind = atomicLoad - i.rn, i.rd = rn, rt + i.rn, i.rd = operandNR(rn), rt i.u2 = size } @@ -1755,12 +1771,12 @@ func (i *instruction) asLoadConstBlockArg(v uint64, typ ssa.Type, dst regalloc.V i.kind = loadConstBlockArg i.u1 = v i.u2 = uint64(typ) - i.rd = operandNR(dst) + i.rd = dst return i } func (i *instruction) loadConstBlockArgData() (v uint64, typ ssa.Type, dst regalloc.VReg) { - return i.u1, ssa.Type(i.u2), i.rd.nr() + return i.u1, ssa.Type(i.u2), i.rd } func (i *instruction) asEmitSourceOffsetInfo(l ssa.SourceOffset) *instruction { @@ -1778,7 +1794,7 @@ func (i *instruction) asUDF() *instruction { return i } -func (i *instruction) asFpuToInt(rd, rn operand, rdSigned, src64bit, dst64bit bool) { +func (i *instruction) asFpuToInt(rd regalloc.VReg, rn operand, rdSigned, src64bit, dst64bit bool) { i.kind = fpuToInt i.rn = rn i.rd = rd @@ -1789,11 +1805,11 @@ func (i *instruction) asFpuToInt(rd, rn operand, rdSigned, src64bit, dst64bit bo i.u2 = 1 } if dst64bit { - i.u3 = 1 + i.u2 |= 2 } } -func (i *instruction) asIntToFpu(rd, rn operand, rnSigned, src64bit, dst64bit bool) { +func (i *instruction) asIntToFpu(rd regalloc.VReg, rn operand, rnSigned, src64bit, dst64bit bool) { i.kind = intToFpu i.rn = rn i.rd = rd @@ -1804,7 +1820,7 @@ func (i *instruction) asIntToFpu(rd, rn operand, rnSigned, src64bit, dst64bit bo i.u2 = 1 } if dst64bit { - i.u3 = 1 + i.u2 |= 2 } } @@ -1817,7 +1833,7 @@ func (i *instruction) asExitSequence(ctx regalloc.VReg) *instruction { // aluOp determines the type of ALU operation. Instructions whose kind is one of // aluRRR, aluRRRR, aluRRImm12, aluRRBitmaskImm, aluRRImmShift, aluRRRShift and aluRRRExtend // would use this type. -type aluOp int +type aluOp uint32 func (a aluOp) String() string { switch a { diff --git a/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go b/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go index 227a964741..f0ede2d6aa 100644 --- a/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go +++ b/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go @@ -44,12 +44,12 @@ func (i *instruction) encode(m *machine) { case callInd: c.Emit4Bytes(encodeUnconditionalBranchReg(regNumberInEncoding[i.rn.realReg()], true)) case store8, store16, store32, store64, fpuStore32, fpuStore64, fpuStore128: - c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rn.realReg()], i.amode)) + c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rn.realReg()], *i.getAmode())) case uLoad8, uLoad16, uLoad32, uLoad64, sLoad8, sLoad16, sLoad32, fpuLoad32, fpuLoad64, fpuLoad128: - c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rd.realReg()], i.amode)) + c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rd.RealReg()], *i.getAmode())) case vecLoad1R: c.Emit4Bytes(encodeVecLoad1R( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], vecArrangement(i.u1))) case condBr: @@ -75,22 +75,22 @@ func (i *instruction) encode(m *machine) { panic("BUG") } case movN: - c.Emit4Bytes(encodeMoveWideImmediate(0b00, regNumberInEncoding[i.rd.realReg()], i.u1, i.u2, i.u3)) + c.Emit4Bytes(encodeMoveWideImmediate(0b00, regNumberInEncoding[i.rd.RealReg()], i.u1, uint32(i.u2), uint32(i.u2>>32))) case movZ: - c.Emit4Bytes(encodeMoveWideImmediate(0b10, regNumberInEncoding[i.rd.realReg()], i.u1, i.u2, i.u3)) + c.Emit4Bytes(encodeMoveWideImmediate(0b10, regNumberInEncoding[i.rd.RealReg()], i.u1, uint32(i.u2), uint32(i.u2>>32))) case movK: - c.Emit4Bytes(encodeMoveWideImmediate(0b11, regNumberInEncoding[i.rd.realReg()], i.u1, i.u2, i.u3)) + c.Emit4Bytes(encodeMoveWideImmediate(0b11, regNumberInEncoding[i.rd.RealReg()], i.u1, uint32(i.u2), uint32(i.u2>>32))) case mov32: - to, from := i.rd.realReg(), i.rn.realReg() + to, from := i.rd.RealReg(), i.rn.realReg() c.Emit4Bytes(encodeAsMov32(regNumberInEncoding[from], regNumberInEncoding[to])) case mov64: - to, from := i.rd.realReg(), i.rn.realReg() + to, from := i.rd.RealReg(), i.rn.realReg() toIsSp := to == sp fromIsSp := from == sp c.Emit4Bytes(encodeMov64(regNumberInEncoding[to], regNumberInEncoding[from], toIsSp, fromIsSp)) case loadP64, storeP64: rt, rt2 := regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()] - amode := i.amode + amode := i.getAmode() rn := regNumberInEncoding[amode.rn.RealReg()] var pre bool switch amode.kind { @@ -102,21 +102,21 @@ func (i *instruction) encode(m *machine) { } c.Emit4Bytes(encodePreOrPostIndexLoadStorePair64(pre, kind == loadP64, rn, rt, rt2, amode.imm)) case loadFpuConst32: - rd := regNumberInEncoding[i.rd.realReg()] + rd := regNumberInEncoding[i.rd.RealReg()] if i.u1 == 0 { c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement8B)) } else { encodeLoadFpuConst32(c, rd, i.u1) } case loadFpuConst64: - rd := regNumberInEncoding[i.rd.realReg()] + rd := regNumberInEncoding[i.rd.RealReg()] if i.u1 == 0 { c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement8B)) } else { - encodeLoadFpuConst64(c, regNumberInEncoding[i.rd.realReg()], i.u1) + encodeLoadFpuConst64(c, regNumberInEncoding[i.rd.RealReg()], i.u1) } case loadFpuConst128: - rd := regNumberInEncoding[i.rd.realReg()] + rd := regNumberInEncoding[i.rd.RealReg()] lo, hi := i.u1, i.u2 if lo == 0 && hi == 0 { c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement16B)) @@ -126,35 +126,35 @@ func (i *instruction) encode(m *machine) { case aluRRRR: c.Emit4Bytes(encodeAluRRRR( aluOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], - regNumberInEncoding[i.ra.realReg()], - uint32(i.u3), + regNumberInEncoding[regalloc.VReg(i.u2).RealReg()], + uint32(i.u1>>32), )) case aluRRImmShift: c.Emit4Bytes(encodeAluRRImm( aluOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], uint32(i.rm.shiftImm()), - uint32(i.u3), + uint32(i.u2>>32), )) case aluRRR: rn := i.rn.realReg() c.Emit4Bytes(encodeAluRRR( aluOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[rn], regNumberInEncoding[i.rm.realReg()], - i.u3 == 1, + i.u2>>32 == 1, rn == sp, )) case aluRRRExtend: rm, exo, to := i.rm.er() c.Emit4Bytes(encodeAluRRRExtend( aluOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[rm.RealReg()], exo, @@ -164,25 +164,25 @@ func (i *instruction) encode(m *machine) { r, amt, sop := i.rm.sr() c.Emit4Bytes(encodeAluRRRShift( aluOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[r.RealReg()], uint32(amt), sop, - i.u3 == 1, + i.u2>>32 == 1, )) case aluRRBitmaskImm: c.Emit4Bytes(encodeAluBitmaskImmediate( aluOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], i.u2, - i.u3 == 1, + i.u1>>32 == 1, )) case bitRR: c.Emit4Bytes(encodeBitRR( bitOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], uint32(i.u2)), ) @@ -190,22 +190,22 @@ func (i *instruction) encode(m *machine) { imm12, shift := i.rm.imm12() c.Emit4Bytes(encodeAluRRImm12( aluOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], imm12, shift, - i.u3 == 1, + i.u2>>32 == 1, )) case fpuRRR: c.Emit4Bytes(encodeFpuRRR( fpuBinOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], - i.u3 == 1, + i.u2 == 1, )) case fpuMov64, fpuMov128: // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/MOV--vector---Move-vector--an-alias-of-ORR--vector--register-- - rd := regNumberInEncoding[i.rd.realReg()] + rd := regNumberInEncoding[i.rd.RealReg()] rn := regNumberInEncoding[i.rn.realReg()] var q uint32 if kind == fpuMov128 { @@ -213,7 +213,7 @@ func (i *instruction) encode(m *machine) { } c.Emit4Bytes(q<<30 | 0b1110101<<21 | rn<<16 | 0b000111<<10 | rn<<5 | rd) case cSet: - rd := regNumberInEncoding[i.rd.realReg()] + rd := regNumberInEncoding[i.rd.RealReg()] cf := condFlag(i.u1) if i.u2 == 1 { // https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/CSETM--Conditional-Set-Mask--an-alias-of-CSINV- @@ -225,12 +225,12 @@ func (i *instruction) encode(m *machine) { c.Emit4Bytes(0b1001101010011111<<16 | uint32(cf.invert())<<12 | 0b111111<<5 | rd) } case extend: - c.Emit4Bytes(encodeExtend(i.u3 == 1, byte(i.u1), byte(i.u2), regNumberInEncoding[i.rd.realReg()], regNumberInEncoding[i.rn.realReg()])) + c.Emit4Bytes(encodeExtend((i.u2>>32) == 1, byte(i.u1), byte(i.u2), regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()])) case fpuCmp: // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/FCMP--Floating-point-quiet-Compare--scalar--?lang=en rn, rm := regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()] var ftype uint32 - if i.u3 == 1 { + if i.u1 == 1 { ftype = 0b01 // double precision. } c.Emit4Bytes(0b1111<<25 | ftype<<22 | 1<<21 | rm<<16 | 0b1<<13 | rn<<5) @@ -242,34 +242,34 @@ func (i *instruction) encode(m *machine) { c.Emit4Bytes(0) } case adr: - c.Emit4Bytes(encodeAdr(regNumberInEncoding[i.rd.realReg()], uint32(i.u1))) + c.Emit4Bytes(encodeAdr(regNumberInEncoding[i.rd.RealReg()], uint32(i.u1))) case cSel: c.Emit4Bytes(encodeConditionalSelect( kind, - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], condFlag(i.u1), - i.u3 == 1, + i.u2 == 1, )) case fpuCSel: c.Emit4Bytes(encodeFpuCSel( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], condFlag(i.u1), - i.u3 == 1, + i.u2 == 1, )) case movToVec: c.Emit4Bytes(encodeMoveToVec( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], vecArrangement(byte(i.u1)), vecIndex(i.u2), )) case movFromVec, movFromVecSigned: c.Emit4Bytes(encodeMoveFromVec( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], vecArrangement(byte(i.u1)), vecIndex(i.u2), @@ -277,18 +277,18 @@ func (i *instruction) encode(m *machine) { )) case vecDup: c.Emit4Bytes(encodeVecDup( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], vecArrangement(byte(i.u1)))) case vecDupElement: c.Emit4Bytes(encodeVecDupElement( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], vecArrangement(byte(i.u1)), vecIndex(i.u2))) case vecExtract: c.Emit4Bytes(encodeVecExtract( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], vecArrangement(byte(i.u1)), @@ -296,35 +296,35 @@ func (i *instruction) encode(m *machine) { case vecPermute: c.Emit4Bytes(encodeVecPermute( vecOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], vecArrangement(byte(i.u2)))) case vecMovElement: c.Emit4Bytes(encodeVecMovElement( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], vecArrangement(i.u1), - uint32(i.u2), uint32(i.u3), + uint32(i.u2), uint32(i.u2>>32), )) case vecMisc: c.Emit4Bytes(encodeAdvancedSIMDTwoMisc( vecOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], vecArrangement(i.u2), )) case vecLanes: c.Emit4Bytes(encodeVecLanes( vecOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], vecArrangement(i.u2), )) case vecShiftImm: c.Emit4Bytes(encodeVecShiftImm( vecOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], uint32(i.rm.shiftImm()), vecArrangement(i.u2), @@ -332,7 +332,7 @@ func (i *instruction) encode(m *machine) { case vecTbl: c.Emit4Bytes(encodeVecTbl( 1, - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], vecArrangement(i.u2)), @@ -340,7 +340,7 @@ func (i *instruction) encode(m *machine) { case vecTbl2: c.Emit4Bytes(encodeVecTbl( 2, - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], vecArrangement(i.u2)), @@ -353,9 +353,9 @@ func (i *instruction) encode(m *machine) { case fpuRR: c.Emit4Bytes(encodeFloatDataOneSource( fpuUniOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], - i.u3 == 1, + i.u2 == 1, )) case vecRRR: if op := vecOp(i.u1); op == vecOpBsl || op == vecOpBit || op == vecOpUmlal { @@ -365,14 +365,14 @@ func (i *instruction) encode(m *machine) { case vecRRRRewrite: c.Emit4Bytes(encodeVecRRR( vecOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], vecArrangement(i.u2), )) case cCmpImm: // Conditional compare (immediate) in https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en - sf := uint32(i.u3 & 0b1) + sf := uint32((i.u2 >> 32) & 0b1) nzcv := uint32(i.u2 & 0b1111) cond := uint32(condFlag(i.u1)) imm := uint32(i.rm.data & 0b11111) @@ -381,7 +381,7 @@ func (i *instruction) encode(m *machine) { sf<<31 | 0b111101001<<22 | imm<<16 | cond<<12 | 0b1<<11 | rn<<5 | nzcv, ) case movFromFPSR: - rt := regNumberInEncoding[i.rd.realReg()] + rt := regNumberInEncoding[i.rd.RealReg()] c.Emit4Bytes(encodeSystemRegisterMove(rt, true)) case movToFPSR: rt := regNumberInEncoding[i.rn.realReg()] @@ -390,13 +390,13 @@ func (i *instruction) encode(m *machine) { c.Emit4Bytes(encodeAtomicRmw( atomicRmwOp(i.u1), regNumberInEncoding[i.rm.realReg()], - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], uint32(i.u2), )) case atomicCas: c.Emit4Bytes(encodeAtomicCas( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rm.realReg()], regNumberInEncoding[i.rn.realReg()], uint32(i.u2), @@ -404,7 +404,7 @@ func (i *instruction) encode(m *machine) { case atomicLoad: c.Emit4Bytes(encodeAtomicLoadStore( regNumberInEncoding[i.rn.realReg()], - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], uint32(i.u2), 1, )) @@ -810,7 +810,7 @@ func encodeFloatDataOneSource(op fpuUniOp, rd, rn uint32, dst64bit bool) uint32 // encodeCnvBetweenFloatInt encodes as "Conversion between floating-point and integer" in // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en func encodeCnvBetweenFloatInt(i *instruction) uint32 { - rd := regNumberInEncoding[i.rd.realReg()] + rd := regNumberInEncoding[i.rd.RealReg()] rn := regNumberInEncoding[i.rn.realReg()] var opcode uint32 @@ -822,8 +822,8 @@ func encodeCnvBetweenFloatInt(i *instruction) uint32 { rmode = 0b00 signed := i.u1 == 1 - src64bit := i.u2 == 1 - dst64bit := i.u3 == 1 + src64bit := i.u2&1 != 0 + dst64bit := i.u2&2 != 0 if signed { opcode = 0b010 } else { @@ -841,8 +841,8 @@ func encodeCnvBetweenFloatInt(i *instruction) uint32 { rmode = 0b11 signed := i.u1 == 1 - src64bit := i.u2 == 1 - dst64bit := i.u3 == 1 + src64bit := i.u2&1 != 0 + dst64bit := i.u2&2 != 0 if signed { opcode = 0b000 @@ -1787,13 +1787,13 @@ func encodeCBZCBNZ(rt uint32, nz bool, imm19 uint32, _64bit bool) (ret uint32) { // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Immediate?lang=en // // "shift" must have been divided by 16 at this point. -func encodeMoveWideImmediate(opc uint32, rd uint32, imm, shift, _64bit uint64) (ret uint32) { +func encodeMoveWideImmediate(opc uint32, rd uint32, imm uint64, shift, _64bit uint32) (ret uint32) { ret = rd ret |= uint32(imm&0xffff) << 5 - ret |= (uint32(shift)) << 21 + ret |= (shift) << 21 ret |= 0b100101 << 23 ret |= opc << 29 - ret |= uint32(_64bit) << 31 + ret |= _64bit << 31 return } diff --git a/internal/engine/wazevo/backend/isa/arm64/instr_encoding_test.go b/internal/engine/wazevo/backend/isa/arm64/instr_encoding_test.go index e60a469cdb..d0c171f623 100644 --- a/internal/engine/wazevo/backend/isa/arm64/instr_encoding_test.go +++ b/internal/engine/wazevo/backend/isa/arm64/instr_encoding_test.go @@ -17,6 +17,7 @@ func Test_dummy(t *testing.T) { } func TestInstruction_encode(t *testing.T) { + m := NewBackend().(*machine) dummyLabel := label(1) for _, tc := range []struct { setup func(*instruction) @@ -27,1058 +28,1058 @@ func TestInstruction_encode(t *testing.T) { {want: "21443bd5", setup: func(i *instruction) { i.asMovFromFPSR(x1VReg) }}, {want: "2f08417a", setup: func(i *instruction) { i.asCCmpImm(operandNR(x1VReg), 1, eq, 0b1111, false) }}, {want: "201841fa", setup: func(i *instruction) { i.asCCmpImm(operandNR(x1VReg), 1, ne, 0, true) }}, - {want: "410c010e", setup: func(i *instruction) { i.asVecDup(operandNR(v1VReg), operandNR(v2VReg), vecArrangement8B) }}, - {want: "410c014e", setup: func(i *instruction) { i.asVecDup(operandNR(v1VReg), operandNR(v2VReg), vecArrangement16B) }}, - {want: "410c020e", setup: func(i *instruction) { i.asVecDup(operandNR(v1VReg), operandNR(v2VReg), vecArrangement4H) }}, - {want: "410c024e", setup: func(i *instruction) { i.asVecDup(operandNR(v1VReg), operandNR(v2VReg), vecArrangement8H) }}, - {want: "410c040e", setup: func(i *instruction) { i.asVecDup(operandNR(v1VReg), operandNR(v2VReg), vecArrangement2S) }}, - {want: "410c044e", setup: func(i *instruction) { i.asVecDup(operandNR(v1VReg), operandNR(v2VReg), vecArrangement4S) }}, - {want: "410c084e", setup: func(i *instruction) { i.asVecDup(operandNR(v1VReg), operandNR(v2VReg), vecArrangement2D) }}, - {want: "4104034e", setup: func(i *instruction) { i.asVecDupElement(operandNR(v1VReg), operandNR(v2VReg), vecArrangementB, 1) }}, - {want: "4104064e", setup: func(i *instruction) { i.asVecDupElement(operandNR(v1VReg), operandNR(v2VReg), vecArrangementH, 1) }}, - {want: "41040c4e", setup: func(i *instruction) { i.asVecDupElement(operandNR(v1VReg), operandNR(v2VReg), vecArrangementS, 1) }}, - {want: "4104184e", setup: func(i *instruction) { i.asVecDupElement(operandNR(v1VReg), operandNR(v2VReg), vecArrangementD, 1) }}, + {want: "410c010e", setup: func(i *instruction) { i.asVecDup(v1VReg, operandNR(v2VReg), vecArrangement8B) }}, + {want: "410c014e", setup: func(i *instruction) { i.asVecDup(v1VReg, operandNR(v2VReg), vecArrangement16B) }}, + {want: "410c020e", setup: func(i *instruction) { i.asVecDup(v1VReg, operandNR(v2VReg), vecArrangement4H) }}, + {want: "410c024e", setup: func(i *instruction) { i.asVecDup(v1VReg, operandNR(v2VReg), vecArrangement8H) }}, + {want: "410c040e", setup: func(i *instruction) { i.asVecDup(v1VReg, operandNR(v2VReg), vecArrangement2S) }}, + {want: "410c044e", setup: func(i *instruction) { i.asVecDup(v1VReg, operandNR(v2VReg), vecArrangement4S) }}, + {want: "410c084e", setup: func(i *instruction) { i.asVecDup(v1VReg, operandNR(v2VReg), vecArrangement2D) }}, + {want: "4104034e", setup: func(i *instruction) { i.asVecDupElement(v1VReg, operandNR(v2VReg), vecArrangementB, 1) }}, + {want: "4104064e", setup: func(i *instruction) { i.asVecDupElement(v1VReg, operandNR(v2VReg), vecArrangementH, 1) }}, + {want: "41040c4e", setup: func(i *instruction) { i.asVecDupElement(v1VReg, operandNR(v2VReg), vecArrangementS, 1) }}, + {want: "4104184e", setup: func(i *instruction) { i.asVecDupElement(v1VReg, operandNR(v2VReg), vecArrangementD, 1) }}, {want: "4138032e", setup: func(i *instruction) { - i.asVecExtract(operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B, 7) + i.asVecExtract(v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B, 7) }}, {want: "4138036e", setup: func(i *instruction) { - i.asVecExtract(operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B, 7) + i.asVecExtract(v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B, 7) }}, - {want: "410c036e", setup: func(i *instruction) { i.asVecMovElement(operandNR(v1VReg), operandNR(v2VReg), vecArrangementB, 1, 1) }}, - {want: "4114066e", setup: func(i *instruction) { i.asVecMovElement(operandNR(v1VReg), operandNR(v2VReg), vecArrangementH, 1, 1) }}, - {want: "41240c6e", setup: func(i *instruction) { i.asVecMovElement(operandNR(v1VReg), operandNR(v2VReg), vecArrangementS, 1, 1) }}, - {want: "4144186e", setup: func(i *instruction) { i.asVecMovElement(operandNR(v1VReg), operandNR(v2VReg), vecArrangementD, 1, 1) }}, + {want: "410c036e", setup: func(i *instruction) { i.asVecMovElement(v1VReg, operandNR(v2VReg), vecArrangementB, 1, 1) }}, + {want: "4114066e", setup: func(i *instruction) { i.asVecMovElement(v1VReg, operandNR(v2VReg), vecArrangementH, 1, 1) }}, + {want: "41240c6e", setup: func(i *instruction) { i.asVecMovElement(v1VReg, operandNR(v2VReg), vecArrangementS, 1, 1) }}, + {want: "4144186e", setup: func(i *instruction) { i.asVecMovElement(v1VReg, operandNR(v2VReg), vecArrangementD, 1, 1) }}, {want: "4104090f", setup: func(i *instruction) { - i.asVecShiftImm(vecOpSshr, operandNR(v1VReg), operandNR(v2VReg), operandShiftImm(7), vecArrangement8B) + i.asVecShiftImm(vecOpSshr, v1VReg, operandNR(v2VReg), operandShiftImm(7), vecArrangement8B) }}, {want: "4104094f", setup: func(i *instruction) { - i.asVecShiftImm(vecOpSshr, operandNR(v1VReg), operandNR(v2VReg), operandShiftImm(7), vecArrangement16B) + i.asVecShiftImm(vecOpSshr, v1VReg, operandNR(v2VReg), operandShiftImm(7), vecArrangement16B) }}, {want: "4104190f", setup: func(i *instruction) { - i.asVecShiftImm(vecOpSshr, operandNR(v1VReg), operandNR(v2VReg), operandShiftImm(7), vecArrangement4H) + i.asVecShiftImm(vecOpSshr, v1VReg, operandNR(v2VReg), operandShiftImm(7), vecArrangement4H) }}, {want: "4104194f", setup: func(i *instruction) { - i.asVecShiftImm(vecOpSshr, operandNR(v1VReg), operandNR(v2VReg), operandShiftImm(7), vecArrangement8H) + i.asVecShiftImm(vecOpSshr, v1VReg, operandNR(v2VReg), operandShiftImm(7), vecArrangement8H) }}, {want: "4104390f", setup: func(i *instruction) { - i.asVecShiftImm(vecOpSshr, operandNR(v1VReg), operandNR(v2VReg), operandShiftImm(7), vecArrangement2S) + i.asVecShiftImm(vecOpSshr, v1VReg, operandNR(v2VReg), operandShiftImm(7), vecArrangement2S) }}, {want: "4104394f", setup: func(i *instruction) { - i.asVecShiftImm(vecOpSshr, operandNR(v1VReg), operandNR(v2VReg), operandShiftImm(7), vecArrangement4S) + i.asVecShiftImm(vecOpSshr, v1VReg, operandNR(v2VReg), operandShiftImm(7), vecArrangement4S) }}, {want: "4104794f", setup: func(i *instruction) { - i.asVecShiftImm(vecOpSshr, operandNR(v1VReg), operandNR(v2VReg), operandShiftImm(7), vecArrangement2D) + i.asVecShiftImm(vecOpSshr, v1VReg, operandNR(v2VReg), operandShiftImm(7), vecArrangement2D) }}, {want: "41a40d0f", setup: func(i *instruction) { - i.asVecShiftImm(vecOpSshll, operandNR(v1VReg), operandNR(v2VReg), operandShiftImm(3), vecArrangement8B) + i.asVecShiftImm(vecOpSshll, v1VReg, operandNR(v2VReg), operandShiftImm(3), vecArrangement8B) }}, {want: "41a40d4f", setup: func(i *instruction) { // sshll2 - i.asVecShiftImm(vecOpSshll, operandNR(v1VReg), operandNR(v2VReg), operandShiftImm(3), vecArrangement16B) + i.asVecShiftImm(vecOpSshll, v1VReg, operandNR(v2VReg), operandShiftImm(3), vecArrangement16B) }}, {want: "41a41d0f", setup: func(i *instruction) { - i.asVecShiftImm(vecOpSshll, operandNR(v1VReg), operandNR(v2VReg), operandShiftImm(3), vecArrangement4H) + i.asVecShiftImm(vecOpSshll, v1VReg, operandNR(v2VReg), operandShiftImm(3), vecArrangement4H) }}, {want: "41a41d4f", setup: func(i *instruction) { // sshll2 - i.asVecShiftImm(vecOpSshll, operandNR(v1VReg), operandNR(v2VReg), operandShiftImm(3), vecArrangement8H) + i.asVecShiftImm(vecOpSshll, v1VReg, operandNR(v2VReg), operandShiftImm(3), vecArrangement8H) }}, {want: "41a43d0f", setup: func(i *instruction) { - i.asVecShiftImm(vecOpSshll, operandNR(v1VReg), operandNR(v2VReg), operandShiftImm(3), vecArrangement2S) + i.asVecShiftImm(vecOpSshll, v1VReg, operandNR(v2VReg), operandShiftImm(3), vecArrangement2S) }}, {want: "41a43d4f", setup: func(i *instruction) { // sshll2 - i.asVecShiftImm(vecOpSshll, operandNR(v1VReg), operandNR(v2VReg), operandShiftImm(3), vecArrangement4S) + i.asVecShiftImm(vecOpSshll, v1VReg, operandNR(v2VReg), operandShiftImm(3), vecArrangement4S) }}, {want: "41a40d2f", setup: func(i *instruction) { - i.asVecShiftImm(vecOpUshll, operandNR(v1VReg), operandNR(v2VReg), operandShiftImm(3), vecArrangement8B) + i.asVecShiftImm(vecOpUshll, v1VReg, operandNR(v2VReg), operandShiftImm(3), vecArrangement8B) }}, {want: "41a40d6f", setup: func(i *instruction) { // ushll2 - i.asVecShiftImm(vecOpUshll, operandNR(v1VReg), operandNR(v2VReg), operandShiftImm(3), vecArrangement16B) + i.asVecShiftImm(vecOpUshll, v1VReg, operandNR(v2VReg), operandShiftImm(3), vecArrangement16B) }}, {want: "41a41d2f", setup: func(i *instruction) { - i.asVecShiftImm(vecOpUshll, operandNR(v1VReg), operandNR(v2VReg), operandShiftImm(3), vecArrangement4H) + i.asVecShiftImm(vecOpUshll, v1VReg, operandNR(v2VReg), operandShiftImm(3), vecArrangement4H) }}, {want: "41a41d6f", setup: func(i *instruction) { // ushll2 - i.asVecShiftImm(vecOpUshll, operandNR(v1VReg), operandNR(v2VReg), operandShiftImm(3), vecArrangement8H) + i.asVecShiftImm(vecOpUshll, v1VReg, operandNR(v2VReg), operandShiftImm(3), vecArrangement8H) }}, {want: "41a43d2f", setup: func(i *instruction) { - i.asVecShiftImm(vecOpUshll, operandNR(v1VReg), operandNR(v2VReg), operandShiftImm(3), vecArrangement2S) + i.asVecShiftImm(vecOpUshll, v1VReg, operandNR(v2VReg), operandShiftImm(3), vecArrangement2S) }}, {want: "41a43d6f", setup: func(i *instruction) { // ushll2 - i.asVecShiftImm(vecOpUshll, operandNR(v1VReg), operandNR(v2VReg), operandShiftImm(3), vecArrangement4S) + i.asVecShiftImm(vecOpUshll, v1VReg, operandNR(v2VReg), operandShiftImm(3), vecArrangement4S) }}, {want: "4100030e", setup: func(i *instruction) { - i.asVecTbl(1, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) + i.asVecTbl(1, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) }}, {want: "4100034e", setup: func(i *instruction) { - i.asVecTbl(1, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) + i.asVecTbl(1, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) }}, {want: "4120040e", setup: func(i *instruction) { - i.asVecTbl(2, operandNR(v1VReg), operandNR(v2VReg), operandNR(v4VReg), vecArrangement8B) + i.asVecTbl(2, v1VReg, operandNR(v2VReg), operandNR(v4VReg), vecArrangement8B) }}, {want: "4120044e", setup: func(i *instruction) { - i.asVecTbl(2, operandNR(v1VReg), operandNR(v2VReg), operandNR(v4VReg), vecArrangement16B) + i.asVecTbl(2, v1VReg, operandNR(v2VReg), operandNR(v4VReg), vecArrangement16B) }}, {want: "4138030e", setup: func(i *instruction) { - i.asVecPermute(vecOpZip1, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) + i.asVecPermute(vecOpZip1, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) }}, {want: "4138034e", setup: func(i *instruction) { - i.asVecPermute(vecOpZip1, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) + i.asVecPermute(vecOpZip1, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) }}, {want: "4138430e", setup: func(i *instruction) { - i.asVecPermute(vecOpZip1, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) + i.asVecPermute(vecOpZip1, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) }}, {want: "4138434e", setup: func(i *instruction) { - i.asVecPermute(vecOpZip1, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) + i.asVecPermute(vecOpZip1, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) }}, {want: "4138830e", setup: func(i *instruction) { - i.asVecPermute(vecOpZip1, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) + i.asVecPermute(vecOpZip1, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) }}, {want: "4138834e", setup: func(i *instruction) { - i.asVecPermute(vecOpZip1, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) + i.asVecPermute(vecOpZip1, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) }}, {want: "4138c34e", setup: func(i *instruction) { - i.asVecPermute(vecOpZip1, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) + i.asVecPermute(vecOpZip1, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) }}, {want: "411ca32e", setup: func(i *instruction) { - i.asVecRRRRewrite(vecOpBit, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) + i.asVecRRRRewrite(vecOpBit, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) }}, {want: "411ca36e", setup: func(i *instruction) { - i.asVecRRRRewrite(vecOpBit, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) + i.asVecRRRRewrite(vecOpBit, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) }}, {want: "411c236e", setup: func(i *instruction) { - i.asVecRRR(vecOpEOR, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) + i.asVecRRR(vecOpEOR, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) }}, {want: "411c232e", setup: func(i *instruction) { - i.asVecRRR(vecOpEOR, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) + i.asVecRRR(vecOpEOR, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) }}, {want: "4184234e", setup: func(i *instruction) { - i.asVecRRR(vecOpAdd, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) + i.asVecRRR(vecOpAdd, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) }}, {want: "4184a34e", setup: func(i *instruction) { - i.asVecRRR(vecOpAdd, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) + i.asVecRRR(vecOpAdd, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) }}, {want: "4184e34e", setup: func(i *instruction) { - i.asVecRRR(vecOpAdd, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) + i.asVecRRR(vecOpAdd, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) }}, {want: "410c230e", setup: func(i *instruction) { - i.asVecRRR(vecOpSqadd, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) + i.asVecRRR(vecOpSqadd, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) }}, {want: "410c234e", setup: func(i *instruction) { - i.asVecRRR(vecOpSqadd, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) + i.asVecRRR(vecOpSqadd, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) }}, {want: "410c630e", setup: func(i *instruction) { - i.asVecRRR(vecOpSqadd, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) + i.asVecRRR(vecOpSqadd, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) }}, {want: "410c634e", setup: func(i *instruction) { - i.asVecRRR(vecOpSqadd, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) + i.asVecRRR(vecOpSqadd, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) }}, {want: "410ca30e", setup: func(i *instruction) { - i.asVecRRR(vecOpSqadd, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) + i.asVecRRR(vecOpSqadd, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) }}, {want: "410ca34e", setup: func(i *instruction) { - i.asVecRRR(vecOpSqadd, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) + i.asVecRRR(vecOpSqadd, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) }}, {want: "410ce34e", setup: func(i *instruction) { - i.asVecRRR(vecOpSqadd, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) + i.asVecRRR(vecOpSqadd, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) }}, {want: "410c232e", setup: func(i *instruction) { - i.asVecRRR(vecOpUqadd, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) + i.asVecRRR(vecOpUqadd, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) }}, {want: "410c236e", setup: func(i *instruction) { - i.asVecRRR(vecOpUqadd, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) + i.asVecRRR(vecOpUqadd, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) }}, {want: "410c632e", setup: func(i *instruction) { - i.asVecRRR(vecOpUqadd, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) + i.asVecRRR(vecOpUqadd, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) }}, {want: "410c636e", setup: func(i *instruction) { - i.asVecRRR(vecOpUqadd, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) + i.asVecRRR(vecOpUqadd, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) }}, {want: "410ca32e", setup: func(i *instruction) { - i.asVecRRR(vecOpUqadd, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) + i.asVecRRR(vecOpUqadd, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) }}, {want: "410ca36e", setup: func(i *instruction) { - i.asVecRRR(vecOpUqadd, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) + i.asVecRRR(vecOpUqadd, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) }}, {want: "410ce36e", setup: func(i *instruction) { - i.asVecRRR(vecOpUqadd, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) + i.asVecRRR(vecOpUqadd, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) }}, {want: "412c230e", setup: func(i *instruction) { - i.asVecRRR(vecOpSqsub, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) + i.asVecRRR(vecOpSqsub, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) }}, {want: "412c234e", setup: func(i *instruction) { - i.asVecRRR(vecOpSqsub, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) + i.asVecRRR(vecOpSqsub, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) }}, {want: "412c630e", setup: func(i *instruction) { - i.asVecRRR(vecOpSqsub, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) + i.asVecRRR(vecOpSqsub, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) }}, {want: "412c634e", setup: func(i *instruction) { - i.asVecRRR(vecOpSqsub, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) + i.asVecRRR(vecOpSqsub, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) }}, {want: "412ca30e", setup: func(i *instruction) { - i.asVecRRR(vecOpSqsub, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) + i.asVecRRR(vecOpSqsub, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) }}, {want: "412ca34e", setup: func(i *instruction) { - i.asVecRRR(vecOpSqsub, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) + i.asVecRRR(vecOpSqsub, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) }}, {want: "412ce34e", setup: func(i *instruction) { - i.asVecRRR(vecOpSqsub, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) + i.asVecRRR(vecOpSqsub, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) }}, {want: "412c232e", setup: func(i *instruction) { - i.asVecRRR(vecOpUqsub, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) + i.asVecRRR(vecOpUqsub, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) }}, {want: "412c236e", setup: func(i *instruction) { - i.asVecRRR(vecOpUqsub, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) + i.asVecRRR(vecOpUqsub, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) }}, {want: "412c632e", setup: func(i *instruction) { - i.asVecRRR(vecOpUqsub, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) + i.asVecRRR(vecOpUqsub, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) }}, {want: "412c636e", setup: func(i *instruction) { - i.asVecRRR(vecOpUqsub, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) + i.asVecRRR(vecOpUqsub, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) }}, {want: "412ca32e", setup: func(i *instruction) { - i.asVecRRR(vecOpUqsub, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) + i.asVecRRR(vecOpUqsub, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) }}, {want: "412ca36e", setup: func(i *instruction) { - i.asVecRRR(vecOpUqsub, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) + i.asVecRRR(vecOpUqsub, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) }}, {want: "412ce36e", setup: func(i *instruction) { - i.asVecRRR(vecOpUqsub, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) + i.asVecRRR(vecOpUqsub, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) }}, {want: "4184232e", setup: func(i *instruction) { - i.asVecRRR(vecOpSub, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) + i.asVecRRR(vecOpSub, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) }}, {want: "4184236e", setup: func(i *instruction) { - i.asVecRRR(vecOpSub, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) + i.asVecRRR(vecOpSub, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) }}, {want: "4184632e", setup: func(i *instruction) { - i.asVecRRR(vecOpSub, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) + i.asVecRRR(vecOpSub, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) }}, {want: "4184636e", setup: func(i *instruction) { - i.asVecRRR(vecOpSub, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) + i.asVecRRR(vecOpSub, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) }}, {want: "4184a32e", setup: func(i *instruction) { - i.asVecRRR(vecOpSub, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) + i.asVecRRR(vecOpSub, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) }}, {want: "4184a36e", setup: func(i *instruction) { - i.asVecRRR(vecOpSub, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) + i.asVecRRR(vecOpSub, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) }}, {want: "4184e36e", setup: func(i *instruction) { - i.asVecRRR(vecOpSub, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) + i.asVecRRR(vecOpSub, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) }}, {want: "41bc230e", setup: func(i *instruction) { - i.asVecRRR(vecOpAddp, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) + i.asVecRRR(vecOpAddp, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) }}, {want: "41bc234e", setup: func(i *instruction) { - i.asVecRRR(vecOpAddp, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) + i.asVecRRR(vecOpAddp, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) }}, {want: "41bc630e", setup: func(i *instruction) { - i.asVecRRR(vecOpAddp, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) + i.asVecRRR(vecOpAddp, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) }}, {want: "41bc634e", setup: func(i *instruction) { - i.asVecRRR(vecOpAddp, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) + i.asVecRRR(vecOpAddp, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) }}, {want: "41bca30e", setup: func(i *instruction) { - i.asVecRRR(vecOpAddp, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) + i.asVecRRR(vecOpAddp, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) }}, {want: "41bca34e", setup: func(i *instruction) { - i.asVecRRR(vecOpAddp, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) + i.asVecRRR(vecOpAddp, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) }}, {want: "41bce34e", setup: func(i *instruction) { - i.asVecRRR(vecOpAddp, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) + i.asVecRRR(vecOpAddp, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) }}, {want: "41bc230e", setup: func(i *instruction) { - i.asVecRRR(vecOpAddp, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) + i.asVecRRR(vecOpAddp, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) }}, {want: "41b8314e", setup: func(i *instruction) { - i.asVecLanes(vecOpAddv, operandNR(v1VReg), operandNR(v2VReg), vecArrangement16B) + i.asVecLanes(vecOpAddv, v1VReg, operandNR(v2VReg), vecArrangement16B) }}, {want: "41b8710e", setup: func(i *instruction) { - i.asVecLanes(vecOpAddv, operandNR(v1VReg), operandNR(v2VReg), vecArrangement4H) + i.asVecLanes(vecOpAddv, v1VReg, operandNR(v2VReg), vecArrangement4H) }}, {want: "41b8714e", setup: func(i *instruction) { - i.asVecLanes(vecOpAddv, operandNR(v1VReg), operandNR(v2VReg), vecArrangement8H) + i.asVecLanes(vecOpAddv, v1VReg, operandNR(v2VReg), vecArrangement8H) }}, {want: "41b8b14e", setup: func(i *instruction) { - i.asVecLanes(vecOpAddv, operandNR(v1VReg), operandNR(v2VReg), vecArrangement4S) + i.asVecLanes(vecOpAddv, v1VReg, operandNR(v2VReg), vecArrangement4S) }}, {want: "416c230e", setup: func(i *instruction) { - i.asVecRRR(vecOpSmin, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) + i.asVecRRR(vecOpSmin, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) }}, {want: "416c234e", setup: func(i *instruction) { - i.asVecRRR(vecOpSmin, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) + i.asVecRRR(vecOpSmin, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) }}, {want: "416c630e", setup: func(i *instruction) { - i.asVecRRR(vecOpSmin, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) + i.asVecRRR(vecOpSmin, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) }}, {want: "416c634e", setup: func(i *instruction) { - i.asVecRRR(vecOpSmin, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) + i.asVecRRR(vecOpSmin, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) }}, {want: "416ca30e", setup: func(i *instruction) { - i.asVecRRR(vecOpSmin, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) + i.asVecRRR(vecOpSmin, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) }}, {want: "416ca34e", setup: func(i *instruction) { - i.asVecRRR(vecOpSmin, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) + i.asVecRRR(vecOpSmin, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) }}, {want: "416c232e", setup: func(i *instruction) { - i.asVecRRR(vecOpUmin, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) + i.asVecRRR(vecOpUmin, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) }}, {want: "416c236e", setup: func(i *instruction) { - i.asVecRRR(vecOpUmin, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) + i.asVecRRR(vecOpUmin, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) }}, {want: "416c632e", setup: func(i *instruction) { - i.asVecRRR(vecOpUmin, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) + i.asVecRRR(vecOpUmin, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) }}, {want: "416c636e", setup: func(i *instruction) { - i.asVecRRR(vecOpUmin, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) + i.asVecRRR(vecOpUmin, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) }}, {want: "416ca32e", setup: func(i *instruction) { - i.asVecRRR(vecOpUmin, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) + i.asVecRRR(vecOpUmin, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) }}, {want: "416ca36e", setup: func(i *instruction) { - i.asVecRRR(vecOpUmin, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) + i.asVecRRR(vecOpUmin, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) }}, {want: "4164230e", setup: func(i *instruction) { - i.asVecRRR(vecOpSmax, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) + i.asVecRRR(vecOpSmax, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) }}, {want: "4164234e", setup: func(i *instruction) { - i.asVecRRR(vecOpSmax, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) + i.asVecRRR(vecOpSmax, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) }}, {want: "4164630e", setup: func(i *instruction) { - i.asVecRRR(vecOpSmax, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) + i.asVecRRR(vecOpSmax, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) }}, {want: "4164634e", setup: func(i *instruction) { - i.asVecRRR(vecOpSmax, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) + i.asVecRRR(vecOpSmax, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) }}, {want: "4164a30e", setup: func(i *instruction) { - i.asVecRRR(vecOpSmax, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) + i.asVecRRR(vecOpSmax, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) }}, {want: "4164a34e", setup: func(i *instruction) { - i.asVecRRR(vecOpSmax, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) + i.asVecRRR(vecOpSmax, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) }}, {want: "4164232e", setup: func(i *instruction) { - i.asVecRRR(vecOpUmax, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) + i.asVecRRR(vecOpUmax, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) }}, {want: "4164236e", setup: func(i *instruction) { - i.asVecRRR(vecOpUmax, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) + i.asVecRRR(vecOpUmax, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) }}, {want: "4164632e", setup: func(i *instruction) { - i.asVecRRR(vecOpUmax, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) + i.asVecRRR(vecOpUmax, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) }}, {want: "4164636e", setup: func(i *instruction) { - i.asVecRRR(vecOpUmax, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) + i.asVecRRR(vecOpUmax, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) }}, {want: "4164a32e", setup: func(i *instruction) { - i.asVecRRR(vecOpUmax, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) + i.asVecRRR(vecOpUmax, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) }}, {want: "4164a36e", setup: func(i *instruction) { - i.asVecRRR(vecOpUmax, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) + i.asVecRRR(vecOpUmax, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) }}, {want: "41a4232e", setup: func(i *instruction) { - i.asVecRRR(vecOpUmaxp, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) + i.asVecRRR(vecOpUmaxp, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) }}, {want: "41a4236e", setup: func(i *instruction) { - i.asVecRRR(vecOpUmaxp, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) + i.asVecRRR(vecOpUmaxp, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) }}, {want: "41a4632e", setup: func(i *instruction) { - i.asVecRRR(vecOpUmaxp, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) + i.asVecRRR(vecOpUmaxp, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) }}, {want: "41a4636e", setup: func(i *instruction) { - i.asVecRRR(vecOpUmaxp, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) + i.asVecRRR(vecOpUmaxp, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) }}, {want: "41a4a32e", setup: func(i *instruction) { - i.asVecRRR(vecOpUmaxp, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) + i.asVecRRR(vecOpUmaxp, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) }}, {want: "41a8312e", setup: func(i *instruction) { - i.asVecLanes(vecOpUminv, operandNR(v1VReg), operandNR(v2VReg), vecArrangement8B) + i.asVecLanes(vecOpUminv, v1VReg, operandNR(v2VReg), vecArrangement8B) }}, {want: "41a8316e", setup: func(i *instruction) { - i.asVecLanes(vecOpUminv, operandNR(v1VReg), operandNR(v2VReg), vecArrangement16B) + i.asVecLanes(vecOpUminv, v1VReg, operandNR(v2VReg), vecArrangement16B) }}, {want: "41a8712e", setup: func(i *instruction) { - i.asVecLanes(vecOpUminv, operandNR(v1VReg), operandNR(v2VReg), vecArrangement4H) + i.asVecLanes(vecOpUminv, v1VReg, operandNR(v2VReg), vecArrangement4H) }}, {want: "41a8716e", setup: func(i *instruction) { - i.asVecLanes(vecOpUminv, operandNR(v1VReg), operandNR(v2VReg), vecArrangement8H) + i.asVecLanes(vecOpUminv, v1VReg, operandNR(v2VReg), vecArrangement8H) }}, {want: "41a8b16e", setup: func(i *instruction) { - i.asVecLanes(vecOpUminv, operandNR(v1VReg), operandNR(v2VReg), vecArrangement4S) + i.asVecLanes(vecOpUminv, v1VReg, operandNR(v2VReg), vecArrangement4S) }}, {want: "4114232e", setup: func(i *instruction) { - i.asVecRRR(vecOpUrhadd, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) + i.asVecRRR(vecOpUrhadd, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) }}, {want: "4114236e", setup: func(i *instruction) { - i.asVecRRR(vecOpUrhadd, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) + i.asVecRRR(vecOpUrhadd, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) }}, {want: "4114632e", setup: func(i *instruction) { - i.asVecRRR(vecOpUrhadd, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) + i.asVecRRR(vecOpUrhadd, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) }}, {want: "4114636e", setup: func(i *instruction) { - i.asVecRRR(vecOpUrhadd, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) + i.asVecRRR(vecOpUrhadd, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) }}, {want: "4114a32e", setup: func(i *instruction) { - i.asVecRRR(vecOpUrhadd, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) + i.asVecRRR(vecOpUrhadd, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) }}, {want: "4114a36e", setup: func(i *instruction) { - i.asVecRRR(vecOpUrhadd, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) + i.asVecRRR(vecOpUrhadd, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) }}, {want: "419c230e", setup: func(i *instruction) { - i.asVecRRR(vecOpMul, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) + i.asVecRRR(vecOpMul, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) }}, {want: "419c234e", setup: func(i *instruction) { - i.asVecRRR(vecOpMul, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) + i.asVecRRR(vecOpMul, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) }}, {want: "419c630e", setup: func(i *instruction) { - i.asVecRRR(vecOpMul, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) + i.asVecRRR(vecOpMul, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) }}, {want: "419c634e", setup: func(i *instruction) { - i.asVecRRR(vecOpMul, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) + i.asVecRRR(vecOpMul, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) }}, {want: "419ca30e", setup: func(i *instruction) { - i.asVecRRR(vecOpMul, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) + i.asVecRRR(vecOpMul, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) }}, {want: "419ca34e", setup: func(i *instruction) { - i.asVecRRR(vecOpMul, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) + i.asVecRRR(vecOpMul, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) }}, {want: "4198200e", setup: func(i *instruction) { - i.asVecMisc(vecOpCmeq0, operandNR(v1VReg), operandNR(v2VReg), vecArrangement8B) + i.asVecMisc(vecOpCmeq0, v1VReg, operandNR(v2VReg), vecArrangement8B) }}, {want: "4198204e", setup: func(i *instruction) { - i.asVecMisc(vecOpCmeq0, operandNR(v1VReg), operandNR(v2VReg), vecArrangement16B) + i.asVecMisc(vecOpCmeq0, v1VReg, operandNR(v2VReg), vecArrangement16B) }}, {want: "4198600e", setup: func(i *instruction) { - i.asVecMisc(vecOpCmeq0, operandNR(v1VReg), operandNR(v2VReg), vecArrangement4H) + i.asVecMisc(vecOpCmeq0, v1VReg, operandNR(v2VReg), vecArrangement4H) }}, {want: "4198604e", setup: func(i *instruction) { - i.asVecMisc(vecOpCmeq0, operandNR(v1VReg), operandNR(v2VReg), vecArrangement8H) + i.asVecMisc(vecOpCmeq0, v1VReg, operandNR(v2VReg), vecArrangement8H) }}, {want: "4198a00e", setup: func(i *instruction) { - i.asVecMisc(vecOpCmeq0, operandNR(v1VReg), operandNR(v2VReg), vecArrangement2S) + i.asVecMisc(vecOpCmeq0, v1VReg, operandNR(v2VReg), vecArrangement2S) }}, {want: "4198a04e", setup: func(i *instruction) { - i.asVecMisc(vecOpCmeq0, operandNR(v1VReg), operandNR(v2VReg), vecArrangement4S) + i.asVecMisc(vecOpCmeq0, v1VReg, operandNR(v2VReg), vecArrangement4S) }}, {want: "4198e04e", setup: func(i *instruction) { - i.asVecMisc(vecOpCmeq0, operandNR(v1VReg), operandNR(v2VReg), vecArrangement2D) + i.asVecMisc(vecOpCmeq0, v1VReg, operandNR(v2VReg), vecArrangement2D) }}, {want: "418c232e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmeq, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) + i.asVecRRR(vecOpCmeq, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) }}, {want: "418c236e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmeq, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) + i.asVecRRR(vecOpCmeq, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) }}, {want: "418c632e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmeq, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) + i.asVecRRR(vecOpCmeq, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) }}, {want: "418c636e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmeq, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) + i.asVecRRR(vecOpCmeq, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) }}, {want: "418ca32e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmeq, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) + i.asVecRRR(vecOpCmeq, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) }}, {want: "418ca36e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmeq, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) + i.asVecRRR(vecOpCmeq, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) }}, {want: "418ce36e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmeq, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) + i.asVecRRR(vecOpCmeq, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) }}, {want: "4134230e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmgt, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) + i.asVecRRR(vecOpCmgt, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) }}, {want: "4134234e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmgt, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) + i.asVecRRR(vecOpCmgt, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) }}, {want: "4134630e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmgt, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) + i.asVecRRR(vecOpCmgt, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) }}, {want: "4134634e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmgt, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) + i.asVecRRR(vecOpCmgt, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) }}, {want: "4134a30e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmgt, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) + i.asVecRRR(vecOpCmgt, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) }}, {want: "4134a34e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmgt, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) + i.asVecRRR(vecOpCmgt, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) }}, {want: "4134e34e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmgt, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) + i.asVecRRR(vecOpCmgt, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) }}, {want: "4134232e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmhi, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) + i.asVecRRR(vecOpCmhi, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) }}, {want: "4134236e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmhi, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) + i.asVecRRR(vecOpCmhi, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) }}, {want: "4134632e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmhi, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) + i.asVecRRR(vecOpCmhi, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) }}, {want: "4134636e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmhi, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) + i.asVecRRR(vecOpCmhi, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) }}, {want: "4134a32e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmhi, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) + i.asVecRRR(vecOpCmhi, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) }}, {want: "4134a36e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmhi, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) + i.asVecRRR(vecOpCmhi, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) }}, {want: "4134e36e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmhi, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) + i.asVecRRR(vecOpCmhi, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) }}, {want: "413c230e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmge, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) + i.asVecRRR(vecOpCmge, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) }}, {want: "413c234e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmge, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) + i.asVecRRR(vecOpCmge, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) }}, {want: "413c630e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmge, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) + i.asVecRRR(vecOpCmge, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) }}, {want: "413c634e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmge, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) + i.asVecRRR(vecOpCmge, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) }}, {want: "413ca30e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmge, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) + i.asVecRRR(vecOpCmge, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) }}, {want: "413ca34e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmge, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) + i.asVecRRR(vecOpCmge, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) }}, {want: "413ce34e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmge, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) + i.asVecRRR(vecOpCmge, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) }}, {want: "4134230e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmgt, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) + i.asVecRRR(vecOpCmgt, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) }}, {want: "4134234e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmgt, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) + i.asVecRRR(vecOpCmgt, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) }}, {want: "4134630e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmgt, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) + i.asVecRRR(vecOpCmgt, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) }}, {want: "4134634e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmgt, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) + i.asVecRRR(vecOpCmgt, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) }}, {want: "4134a30e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmgt, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) + i.asVecRRR(vecOpCmgt, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) }}, {want: "4134a34e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmgt, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) + i.asVecRRR(vecOpCmgt, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) }}, {want: "4134e34e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmgt, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) + i.asVecRRR(vecOpCmgt, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) }}, {want: "4134232e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmhi, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) + i.asVecRRR(vecOpCmhi, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) }}, {want: "4134236e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmhi, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) + i.asVecRRR(vecOpCmhi, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) }}, {want: "4134632e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmhi, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) + i.asVecRRR(vecOpCmhi, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) }}, {want: "4134636e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmhi, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) + i.asVecRRR(vecOpCmhi, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) }}, {want: "4134a32e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmhi, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) + i.asVecRRR(vecOpCmhi, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) }}, {want: "4134a36e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmhi, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) + i.asVecRRR(vecOpCmhi, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) }}, {want: "4134e36e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmhi, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) + i.asVecRRR(vecOpCmhi, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) }}, {want: "413c232e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmhs, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) + i.asVecRRR(vecOpCmhs, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) }}, {want: "413c236e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmhs, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) + i.asVecRRR(vecOpCmhs, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) }}, {want: "413c632e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmhs, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) + i.asVecRRR(vecOpCmhs, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) }}, {want: "413c636e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmhs, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) + i.asVecRRR(vecOpCmhs, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) }}, {want: "413ca32e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmhs, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) + i.asVecRRR(vecOpCmhs, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) }}, {want: "413ca36e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmhs, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) + i.asVecRRR(vecOpCmhs, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) }}, {want: "413ce36e", setup: func(i *instruction) { - i.asVecRRR(vecOpCmhs, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) + i.asVecRRR(vecOpCmhs, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) }}, {want: "41f4230e", setup: func(i *instruction) { - i.asVecRRR(vecOpFmax, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) + i.asVecRRR(vecOpFmax, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) }}, {want: "41f4234e", setup: func(i *instruction) { - i.asVecRRR(vecOpFmax, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) + i.asVecRRR(vecOpFmax, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) }}, {want: "41f4634e", setup: func(i *instruction) { - i.asVecRRR(vecOpFmax, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) + i.asVecRRR(vecOpFmax, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) }}, {want: "41f4a30e", setup: func(i *instruction) { - i.asVecRRR(vecOpFmin, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) + i.asVecRRR(vecOpFmin, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) }}, {want: "41f4a34e", setup: func(i *instruction) { - i.asVecRRR(vecOpFmin, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) + i.asVecRRR(vecOpFmin, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) }}, {want: "41f4e34e", setup: func(i *instruction) { - i.asVecRRR(vecOpFmin, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) + i.asVecRRR(vecOpFmin, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) }}, {want: "41d4230e", setup: func(i *instruction) { - i.asVecRRR(vecOpFadd, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) + i.asVecRRR(vecOpFadd, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) }}, {want: "41d4234e", setup: func(i *instruction) { - i.asVecRRR(vecOpFadd, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) + i.asVecRRR(vecOpFadd, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) }}, {want: "41d4634e", setup: func(i *instruction) { - i.asVecRRR(vecOpFadd, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) + i.asVecRRR(vecOpFadd, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) }}, {want: "41d4a30e", setup: func(i *instruction) { - i.asVecRRR(vecOpFsub, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) + i.asVecRRR(vecOpFsub, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) }}, {want: "41d4a34e", setup: func(i *instruction) { - i.asVecRRR(vecOpFsub, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) + i.asVecRRR(vecOpFsub, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) }}, {want: "41d4e34e", setup: func(i *instruction) { - i.asVecRRR(vecOpFsub, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) + i.asVecRRR(vecOpFsub, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) }}, {want: "41dc232e", setup: func(i *instruction) { - i.asVecRRR(vecOpFmul, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) + i.asVecRRR(vecOpFmul, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) }}, {want: "41dc236e", setup: func(i *instruction) { - i.asVecRRR(vecOpFmul, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) + i.asVecRRR(vecOpFmul, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) }}, {want: "41dc636e", setup: func(i *instruction) { - i.asVecRRR(vecOpFmul, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) + i.asVecRRR(vecOpFmul, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) }}, {want: "41b4636e", setup: func(i *instruction) { - i.asVecRRR(vecOpSqrdmulh, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) + i.asVecRRR(vecOpSqrdmulh, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) }}, {want: "41b4632e", setup: func(i *instruction) { - i.asVecRRR(vecOpSqrdmulh, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) + i.asVecRRR(vecOpSqrdmulh, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) }}, {want: "41b4a32e", setup: func(i *instruction) { - i.asVecRRR(vecOpSqrdmulh, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) + i.asVecRRR(vecOpSqrdmulh, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) }}, {want: "41b4a36e", setup: func(i *instruction) { - i.asVecRRR(vecOpSqrdmulh, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) + i.asVecRRR(vecOpSqrdmulh, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) }}, {want: "41fc232e", setup: func(i *instruction) { - i.asVecRRR(vecOpFdiv, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) + i.asVecRRR(vecOpFdiv, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) }}, {want: "41fc236e", setup: func(i *instruction) { - i.asVecRRR(vecOpFdiv, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) + i.asVecRRR(vecOpFdiv, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) }}, {want: "41fc636e", setup: func(i *instruction) { - i.asVecRRR(vecOpFdiv, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) + i.asVecRRR(vecOpFdiv, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) }}, {want: "41e4230e", setup: func(i *instruction) { - i.asVecRRR(vecOpFcmeq, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) + i.asVecRRR(vecOpFcmeq, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) }}, {want: "41e4234e", setup: func(i *instruction) { - i.asVecRRR(vecOpFcmeq, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) + i.asVecRRR(vecOpFcmeq, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) }}, {want: "41e4634e", setup: func(i *instruction) { - i.asVecRRR(vecOpFcmeq, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) + i.asVecRRR(vecOpFcmeq, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) }}, {want: "41e4a32e", setup: func(i *instruction) { - i.asVecRRR(vecOpFcmgt, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) + i.asVecRRR(vecOpFcmgt, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) }}, {want: "41e4a36e", setup: func(i *instruction) { - i.asVecRRR(vecOpFcmgt, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) + i.asVecRRR(vecOpFcmgt, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) }}, {want: "41e4e36e", setup: func(i *instruction) { - i.asVecRRR(vecOpFcmgt, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) + i.asVecRRR(vecOpFcmgt, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) }}, {want: "41e4232e", setup: func(i *instruction) { - i.asVecRRR(vecOpFcmge, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) + i.asVecRRR(vecOpFcmge, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) }}, {want: "41e4236e", setup: func(i *instruction) { - i.asVecRRR(vecOpFcmge, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) + i.asVecRRR(vecOpFcmge, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) }}, {want: "41e4636e", setup: func(i *instruction) { - i.asVecRRR(vecOpFcmge, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) + i.asVecRRR(vecOpFcmge, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) }}, {want: "4198210e", setup: func(i *instruction) { - i.asVecMisc(vecOpFrintm, operandNR(v1VReg), operandNR(v2VReg), vecArrangement2S) + i.asVecMisc(vecOpFrintm, v1VReg, operandNR(v2VReg), vecArrangement2S) }}, {want: "4198214e", setup: func(i *instruction) { - i.asVecMisc(vecOpFrintm, operandNR(v1VReg), operandNR(v2VReg), vecArrangement4S) + i.asVecMisc(vecOpFrintm, v1VReg, operandNR(v2VReg), vecArrangement4S) }}, {want: "4198614e", setup: func(i *instruction) { - i.asVecMisc(vecOpFrintm, operandNR(v1VReg), operandNR(v2VReg), vecArrangement2D) + i.asVecMisc(vecOpFrintm, v1VReg, operandNR(v2VReg), vecArrangement2D) }}, {want: "4188210e", setup: func(i *instruction) { - i.asVecMisc(vecOpFrintn, operandNR(v1VReg), operandNR(v2VReg), vecArrangement2S) + i.asVecMisc(vecOpFrintn, v1VReg, operandNR(v2VReg), vecArrangement2S) }}, {want: "4188214e", setup: func(i *instruction) { - i.asVecMisc(vecOpFrintn, operandNR(v1VReg), operandNR(v2VReg), vecArrangement4S) + i.asVecMisc(vecOpFrintn, v1VReg, operandNR(v2VReg), vecArrangement4S) }}, {want: "4188614e", setup: func(i *instruction) { - i.asVecMisc(vecOpFrintn, operandNR(v1VReg), operandNR(v2VReg), vecArrangement2D) + i.asVecMisc(vecOpFrintn, v1VReg, operandNR(v2VReg), vecArrangement2D) }}, {want: "4188a10e", setup: func(i *instruction) { - i.asVecMisc(vecOpFrintp, operandNR(v1VReg), operandNR(v2VReg), vecArrangement2S) + i.asVecMisc(vecOpFrintp, v1VReg, operandNR(v2VReg), vecArrangement2S) }}, {want: "4188a14e", setup: func(i *instruction) { - i.asVecMisc(vecOpFrintp, operandNR(v1VReg), operandNR(v2VReg), vecArrangement4S) + i.asVecMisc(vecOpFrintp, v1VReg, operandNR(v2VReg), vecArrangement4S) }}, {want: "4188e14e", setup: func(i *instruction) { - i.asVecMisc(vecOpFrintp, operandNR(v1VReg), operandNR(v2VReg), vecArrangement2D) + i.asVecMisc(vecOpFrintp, v1VReg, operandNR(v2VReg), vecArrangement2D) }}, {want: "4198a10e", setup: func(i *instruction) { - i.asVecMisc(vecOpFrintz, operandNR(v1VReg), operandNR(v2VReg), vecArrangement2S) + i.asVecMisc(vecOpFrintz, v1VReg, operandNR(v2VReg), vecArrangement2S) }}, {want: "4198a14e", setup: func(i *instruction) { - i.asVecMisc(vecOpFrintz, operandNR(v1VReg), operandNR(v2VReg), vecArrangement4S) + i.asVecMisc(vecOpFrintz, v1VReg, operandNR(v2VReg), vecArrangement4S) }}, {want: "4198e14e", setup: func(i *instruction) { - i.asVecMisc(vecOpFrintz, operandNR(v1VReg), operandNR(v2VReg), vecArrangement2D) + i.asVecMisc(vecOpFrintz, v1VReg, operandNR(v2VReg), vecArrangement2D) }}, {want: "4178610e", setup: func(i *instruction) { - i.asVecMisc(vecOpFcvtl, operandNR(v1VReg), operandNR(v2VReg), vecArrangement2S) + i.asVecMisc(vecOpFcvtl, v1VReg, operandNR(v2VReg), vecArrangement2S) }}, {want: "4178210e", setup: func(i *instruction) { - i.asVecMisc(vecOpFcvtl, operandNR(v1VReg), operandNR(v2VReg), vecArrangement4H) + i.asVecMisc(vecOpFcvtl, v1VReg, operandNR(v2VReg), vecArrangement4H) }}, {want: "4168610e", setup: func(i *instruction) { - i.asVecMisc(vecOpFcvtn, operandNR(v1VReg), operandNR(v2VReg), vecArrangement2S) + i.asVecMisc(vecOpFcvtn, v1VReg, operandNR(v2VReg), vecArrangement2S) }}, {want: "4168210e", setup: func(i *instruction) { - i.asVecMisc(vecOpFcvtn, operandNR(v1VReg), operandNR(v2VReg), vecArrangement4H) + i.asVecMisc(vecOpFcvtn, v1VReg, operandNR(v2VReg), vecArrangement4H) }}, {want: "41b8a10e", setup: func(i *instruction) { - i.asVecMisc(vecOpFcvtzs, operandNR(v1VReg), operandNR(v2VReg), vecArrangement2S) + i.asVecMisc(vecOpFcvtzs, v1VReg, operandNR(v2VReg), vecArrangement2S) }}, {want: "41b8a14e", setup: func(i *instruction) { - i.asVecMisc(vecOpFcvtzs, operandNR(v1VReg), operandNR(v2VReg), vecArrangement4S) + i.asVecMisc(vecOpFcvtzs, v1VReg, operandNR(v2VReg), vecArrangement4S) }}, {want: "41b8e14e", setup: func(i *instruction) { - i.asVecMisc(vecOpFcvtzs, operandNR(v1VReg), operandNR(v2VReg), vecArrangement2D) + i.asVecMisc(vecOpFcvtzs, v1VReg, operandNR(v2VReg), vecArrangement2D) }}, {want: "41b8a12e", setup: func(i *instruction) { - i.asVecMisc(vecOpFcvtzu, operandNR(v1VReg), operandNR(v2VReg), vecArrangement2S) + i.asVecMisc(vecOpFcvtzu, v1VReg, operandNR(v2VReg), vecArrangement2S) }}, {want: "41b8a16e", setup: func(i *instruction) { - i.asVecMisc(vecOpFcvtzu, operandNR(v1VReg), operandNR(v2VReg), vecArrangement4S) + i.asVecMisc(vecOpFcvtzu, v1VReg, operandNR(v2VReg), vecArrangement4S) }}, {want: "41b8e16e", setup: func(i *instruction) { - i.asVecMisc(vecOpFcvtzu, operandNR(v1VReg), operandNR(v2VReg), vecArrangement2D) + i.asVecMisc(vecOpFcvtzu, v1VReg, operandNR(v2VReg), vecArrangement2D) }}, {want: "41d8210e", setup: func(i *instruction) { - i.asVecMisc(vecOpScvtf, operandNR(v1VReg), operandNR(v2VReg), vecArrangement2S) + i.asVecMisc(vecOpScvtf, v1VReg, operandNR(v2VReg), vecArrangement2S) }}, {want: "41d8214e", setup: func(i *instruction) { - i.asVecMisc(vecOpScvtf, operandNR(v1VReg), operandNR(v2VReg), vecArrangement4S) + i.asVecMisc(vecOpScvtf, v1VReg, operandNR(v2VReg), vecArrangement4S) }}, {want: "41d8614e", setup: func(i *instruction) { - i.asVecMisc(vecOpScvtf, operandNR(v1VReg), operandNR(v2VReg), vecArrangement2D) + i.asVecMisc(vecOpScvtf, v1VReg, operandNR(v2VReg), vecArrangement2D) }}, {want: "41d8212e", setup: func(i *instruction) { - i.asVecMisc(vecOpUcvtf, operandNR(v1VReg), operandNR(v2VReg), vecArrangement2S) + i.asVecMisc(vecOpUcvtf, v1VReg, operandNR(v2VReg), vecArrangement2S) }}, {want: "41d8216e", setup: func(i *instruction) { - i.asVecMisc(vecOpUcvtf, operandNR(v1VReg), operandNR(v2VReg), vecArrangement4S) + i.asVecMisc(vecOpUcvtf, v1VReg, operandNR(v2VReg), vecArrangement4S) }}, {want: "41d8616e", setup: func(i *instruction) { - i.asVecMisc(vecOpUcvtf, operandNR(v1VReg), operandNR(v2VReg), vecArrangement2D) + i.asVecMisc(vecOpUcvtf, v1VReg, operandNR(v2VReg), vecArrangement2D) }}, {want: "4148210e", setup: func(i *instruction) { - i.asVecMisc(vecOpSqxtn, operandNR(v1VReg), operandNR(v2VReg), vecArrangement8B) + i.asVecMisc(vecOpSqxtn, v1VReg, operandNR(v2VReg), vecArrangement8B) }}, {want: "4148214e", setup: func(i *instruction) { // sqxtn2 - i.asVecMisc(vecOpSqxtn, operandNR(v1VReg), operandNR(v2VReg), vecArrangement16B) + i.asVecMisc(vecOpSqxtn, v1VReg, operandNR(v2VReg), vecArrangement16B) }}, {want: "4148610e", setup: func(i *instruction) { - i.asVecMisc(vecOpSqxtn, operandNR(v1VReg), operandNR(v2VReg), vecArrangement4H) + i.asVecMisc(vecOpSqxtn, v1VReg, operandNR(v2VReg), vecArrangement4H) }}, {want: "4148614e", setup: func(i *instruction) { // sqxtn2 - i.asVecMisc(vecOpSqxtn, operandNR(v1VReg), operandNR(v2VReg), vecArrangement8H) + i.asVecMisc(vecOpSqxtn, v1VReg, operandNR(v2VReg), vecArrangement8H) }}, {want: "4148a10e", setup: func(i *instruction) { - i.asVecMisc(vecOpSqxtn, operandNR(v1VReg), operandNR(v2VReg), vecArrangement2S) + i.asVecMisc(vecOpSqxtn, v1VReg, operandNR(v2VReg), vecArrangement2S) }}, {want: "4148a14e", setup: func(i *instruction) { // sqxtun2 - i.asVecMisc(vecOpSqxtn, operandNR(v1VReg), operandNR(v2VReg), vecArrangement4S) + i.asVecMisc(vecOpSqxtn, v1VReg, operandNR(v2VReg), vecArrangement4S) }}, {want: "4128212e", setup: func(i *instruction) { - i.asVecMisc(vecOpSqxtun, operandNR(v1VReg), operandNR(v2VReg), vecArrangement8B) + i.asVecMisc(vecOpSqxtun, v1VReg, operandNR(v2VReg), vecArrangement8B) }}, {want: "4128216e", setup: func(i *instruction) { // uqxtun2 - i.asVecMisc(vecOpSqxtun, operandNR(v1VReg), operandNR(v2VReg), vecArrangement16B) + i.asVecMisc(vecOpSqxtun, v1VReg, operandNR(v2VReg), vecArrangement16B) }}, {want: "4128612e", setup: func(i *instruction) { - i.asVecMisc(vecOpSqxtun, operandNR(v1VReg), operandNR(v2VReg), vecArrangement4H) + i.asVecMisc(vecOpSqxtun, v1VReg, operandNR(v2VReg), vecArrangement4H) }}, {want: "4128616e", setup: func(i *instruction) { // sqxtun2 - i.asVecMisc(vecOpSqxtun, operandNR(v1VReg), operandNR(v2VReg), vecArrangement8H) + i.asVecMisc(vecOpSqxtun, v1VReg, operandNR(v2VReg), vecArrangement8H) }}, {want: "4128a12e", setup: func(i *instruction) { - i.asVecMisc(vecOpSqxtun, operandNR(v1VReg), operandNR(v2VReg), vecArrangement2S) + i.asVecMisc(vecOpSqxtun, v1VReg, operandNR(v2VReg), vecArrangement2S) }}, {want: "4128a16e", setup: func(i *instruction) { // sqxtun2 - i.asVecMisc(vecOpSqxtun, operandNR(v1VReg), operandNR(v2VReg), vecArrangement4S) + i.asVecMisc(vecOpSqxtun, v1VReg, operandNR(v2VReg), vecArrangement4S) }}, {want: "4148212e", setup: func(i *instruction) { - i.asVecMisc(vecOpUqxtn, operandNR(v1VReg), operandNR(v2VReg), vecArrangement8B) + i.asVecMisc(vecOpUqxtn, v1VReg, operandNR(v2VReg), vecArrangement8B) }}, {want: "4148216e", setup: func(i *instruction) { // uqxtn2 - i.asVecMisc(vecOpUqxtn, operandNR(v1VReg), operandNR(v2VReg), vecArrangement16B) + i.asVecMisc(vecOpUqxtn, v1VReg, operandNR(v2VReg), vecArrangement16B) }}, {want: "4148612e", setup: func(i *instruction) { - i.asVecMisc(vecOpUqxtn, operandNR(v1VReg), operandNR(v2VReg), vecArrangement4H) + i.asVecMisc(vecOpUqxtn, v1VReg, operandNR(v2VReg), vecArrangement4H) }}, {want: "4148616e", setup: func(i *instruction) { // sqxtn2 - i.asVecMisc(vecOpUqxtn, operandNR(v1VReg), operandNR(v2VReg), vecArrangement8H) + i.asVecMisc(vecOpUqxtn, v1VReg, operandNR(v2VReg), vecArrangement8H) }}, {want: "4148a12e", setup: func(i *instruction) { - i.asVecMisc(vecOpUqxtn, operandNR(v1VReg), operandNR(v2VReg), vecArrangement2S) + i.asVecMisc(vecOpUqxtn, v1VReg, operandNR(v2VReg), vecArrangement2S) }}, {want: "4148a16e", setup: func(i *instruction) { // sqxtn2 - i.asVecMisc(vecOpUqxtn, operandNR(v1VReg), operandNR(v2VReg), vecArrangement4S) + i.asVecMisc(vecOpUqxtn, v1VReg, operandNR(v2VReg), vecArrangement4S) }}, {want: "41b8200e", setup: func(i *instruction) { - i.asVecMisc(vecOpAbs, operandNR(v1VReg), operandNR(v2VReg), vecArrangement8B) + i.asVecMisc(vecOpAbs, v1VReg, operandNR(v2VReg), vecArrangement8B) }}, {want: "41b8204e", setup: func(i *instruction) { - i.asVecMisc(vecOpAbs, operandNR(v1VReg), operandNR(v2VReg), vecArrangement16B) + i.asVecMisc(vecOpAbs, v1VReg, operandNR(v2VReg), vecArrangement16B) }}, {want: "41b8600e", setup: func(i *instruction) { - i.asVecMisc(vecOpAbs, operandNR(v1VReg), operandNR(v2VReg), vecArrangement4H) + i.asVecMisc(vecOpAbs, v1VReg, operandNR(v2VReg), vecArrangement4H) }}, {want: "41b8604e", setup: func(i *instruction) { - i.asVecMisc(vecOpAbs, operandNR(v1VReg), operandNR(v2VReg), vecArrangement8H) + i.asVecMisc(vecOpAbs, v1VReg, operandNR(v2VReg), vecArrangement8H) }}, {want: "41b8a00e", setup: func(i *instruction) { - i.asVecMisc(vecOpAbs, operandNR(v1VReg), operandNR(v2VReg), vecArrangement2S) + i.asVecMisc(vecOpAbs, v1VReg, operandNR(v2VReg), vecArrangement2S) }}, {want: "41b8a04e", setup: func(i *instruction) { - i.asVecMisc(vecOpAbs, operandNR(v1VReg), operandNR(v2VReg), vecArrangement4S) + i.asVecMisc(vecOpAbs, v1VReg, operandNR(v2VReg), vecArrangement4S) }}, {want: "41b8e04e", setup: func(i *instruction) { - i.asVecMisc(vecOpAbs, operandNR(v1VReg), operandNR(v2VReg), vecArrangement2D) + i.asVecMisc(vecOpAbs, v1VReg, operandNR(v2VReg), vecArrangement2D) }}, {want: "41f8a00e", setup: func(i *instruction) { - i.asVecMisc(vecOpFabs, operandNR(v1VReg), operandNR(v2VReg), vecArrangement2S) + i.asVecMisc(vecOpFabs, v1VReg, operandNR(v2VReg), vecArrangement2S) }}, {want: "41f8a04e", setup: func(i *instruction) { - i.asVecMisc(vecOpFabs, operandNR(v1VReg), operandNR(v2VReg), vecArrangement4S) + i.asVecMisc(vecOpFabs, v1VReg, operandNR(v2VReg), vecArrangement4S) }}, {want: "41f8e04e", setup: func(i *instruction) { - i.asVecMisc(vecOpFabs, operandNR(v1VReg), operandNR(v2VReg), vecArrangement2D) + i.asVecMisc(vecOpFabs, v1VReg, operandNR(v2VReg), vecArrangement2D) }}, {want: "41b8202e", setup: func(i *instruction) { - i.asVecMisc(vecOpNeg, operandNR(v1VReg), operandNR(v2VReg), vecArrangement8B) + i.asVecMisc(vecOpNeg, v1VReg, operandNR(v2VReg), vecArrangement8B) }}, {want: "41b8206e", setup: func(i *instruction) { - i.asVecMisc(vecOpNeg, operandNR(v1VReg), operandNR(v2VReg), vecArrangement16B) + i.asVecMisc(vecOpNeg, v1VReg, operandNR(v2VReg), vecArrangement16B) }}, {want: "41b8602e", setup: func(i *instruction) { - i.asVecMisc(vecOpNeg, operandNR(v1VReg), operandNR(v2VReg), vecArrangement4H) + i.asVecMisc(vecOpNeg, v1VReg, operandNR(v2VReg), vecArrangement4H) }}, {want: "41b8606e", setup: func(i *instruction) { - i.asVecMisc(vecOpNeg, operandNR(v1VReg), operandNR(v2VReg), vecArrangement8H) + i.asVecMisc(vecOpNeg, v1VReg, operandNR(v2VReg), vecArrangement8H) }}, {want: "41b8a02e", setup: func(i *instruction) { - i.asVecMisc(vecOpNeg, operandNR(v1VReg), operandNR(v2VReg), vecArrangement2S) + i.asVecMisc(vecOpNeg, v1VReg, operandNR(v2VReg), vecArrangement2S) }}, {want: "41b8a06e", setup: func(i *instruction) { - i.asVecMisc(vecOpNeg, operandNR(v1VReg), operandNR(v2VReg), vecArrangement4S) + i.asVecMisc(vecOpNeg, v1VReg, operandNR(v2VReg), vecArrangement4S) }}, {want: "41b8e06e", setup: func(i *instruction) { - i.asVecMisc(vecOpNeg, operandNR(v1VReg), operandNR(v2VReg), vecArrangement2D) + i.asVecMisc(vecOpNeg, v1VReg, operandNR(v2VReg), vecArrangement2D) }}, {want: "4128a10e", setup: func(i *instruction) { - i.asVecMisc(vecOpXtn, operandNR(v1VReg), operandNR(v2VReg), vecArrangement2S) + i.asVecMisc(vecOpXtn, v1VReg, operandNR(v2VReg), vecArrangement2S) }}, {want: "4128a10e", setup: func(i *instruction) { - i.asVecMisc(vecOpXtn, operandNR(v1VReg), operandNR(v2VReg), vecArrangement2S) + i.asVecMisc(vecOpXtn, v1VReg, operandNR(v2VReg), vecArrangement2S) }}, {want: "41f8a02e", setup: func(i *instruction) { - i.asVecMisc(vecOpFneg, operandNR(v1VReg), operandNR(v2VReg), vecArrangement2S) + i.asVecMisc(vecOpFneg, v1VReg, operandNR(v2VReg), vecArrangement2S) }}, {want: "41f8a06e", setup: func(i *instruction) { - i.asVecMisc(vecOpFneg, operandNR(v1VReg), operandNR(v2VReg), vecArrangement4S) + i.asVecMisc(vecOpFneg, v1VReg, operandNR(v2VReg), vecArrangement4S) }}, {want: "41f8e06e", setup: func(i *instruction) { - i.asVecMisc(vecOpFneg, operandNR(v1VReg), operandNR(v2VReg), vecArrangement2D) + i.asVecMisc(vecOpFneg, v1VReg, operandNR(v2VReg), vecArrangement2D) }}, {want: "41f8a12e", setup: func(i *instruction) { - i.asVecMisc(vecOpFsqrt, operandNR(v1VReg), operandNR(v2VReg), vecArrangement2S) + i.asVecMisc(vecOpFsqrt, v1VReg, operandNR(v2VReg), vecArrangement2S) }}, {want: "41f8a16e", setup: func(i *instruction) { - i.asVecMisc(vecOpFsqrt, operandNR(v1VReg), operandNR(v2VReg), vecArrangement4S) + i.asVecMisc(vecOpFsqrt, v1VReg, operandNR(v2VReg), vecArrangement4S) }}, {want: "41f8e16e", setup: func(i *instruction) { - i.asVecMisc(vecOpFsqrt, operandNR(v1VReg), operandNR(v2VReg), vecArrangement2D) - }}, - {want: "4100839a", setup: func(i *instruction) { i.asCSel(operandNR(x1VReg), operandNR(x2VReg), operandNR(x3VReg), eq, true) }}, - {want: "4110839a", setup: func(i *instruction) { i.asCSel(operandNR(x1VReg), operandNR(x2VReg), operandNR(x3VReg), ne, true) }}, - {want: "4100831a", setup: func(i *instruction) { i.asCSel(operandNR(x1VReg), operandNR(x2VReg), operandNR(x3VReg), eq, false) }}, - {want: "4110831a", setup: func(i *instruction) { i.asCSel(operandNR(x1VReg), operandNR(x2VReg), operandNR(x3VReg), ne, false) }}, - {want: "41cc631e", setup: func(i *instruction) { i.asFpuCSel(operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), gt, true) }}, - {want: "41bc631e", setup: func(i *instruction) { i.asFpuCSel(operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), lt, true) }}, - {want: "41cc231e", setup: func(i *instruction) { i.asFpuCSel(operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), gt, false) }}, - {want: "41bc231e", setup: func(i *instruction) { i.asFpuCSel(operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), lt, false) }}, - {want: "411c014e", setup: func(i *instruction) { i.asMovToVec(operandNR(v1VReg), operandNR(x2VReg), vecArrangementB, 0) }}, - {want: "411c024e", setup: func(i *instruction) { i.asMovToVec(operandNR(v1VReg), operandNR(x2VReg), vecArrangementH, 0) }}, - {want: "411c044e", setup: func(i *instruction) { i.asMovToVec(operandNR(v1VReg), operandNR(x2VReg), vecArrangementS, 0) }}, - {want: "411c084e", setup: func(i *instruction) { i.asMovToVec(operandNR(v1VReg), operandNR(x2VReg), vecArrangementD, 0) }}, - {want: "413c010e", setup: func(i *instruction) { i.asMovFromVec(operandNR(x1VReg), operandNR(v2VReg), vecArrangementB, 0, false) }}, - {want: "413c020e", setup: func(i *instruction) { i.asMovFromVec(operandNR(x1VReg), operandNR(v2VReg), vecArrangementH, 0, false) }}, - {want: "413c040e", setup: func(i *instruction) { i.asMovFromVec(operandNR(x1VReg), operandNR(v2VReg), vecArrangementS, 0, false) }}, - {want: "413c084e", setup: func(i *instruction) { i.asMovFromVec(operandNR(x1VReg), operandNR(v2VReg), vecArrangementD, 0, false) }}, - {want: "412c030e", setup: func(i *instruction) { i.asMovFromVec(operandNR(x1VReg), operandNR(v2VReg), vecArrangementB, 1, true) }}, - {want: "412c060e", setup: func(i *instruction) { i.asMovFromVec(operandNR(x1VReg), operandNR(v2VReg), vecArrangementH, 1, true) }}, - {want: "412c0c4e", setup: func(i *instruction) { i.asMovFromVec(operandNR(x1VReg), operandNR(v2VReg), vecArrangementS, 1, true) }}, - {want: "410c084e", setup: func(i *instruction) { i.asVecDup(operandNR(x1VReg), operandNR(v2VReg), vecArrangement2D) }}, + i.asVecMisc(vecOpFsqrt, v1VReg, operandNR(v2VReg), vecArrangement2D) + }}, + {want: "4100839a", setup: func(i *instruction) { i.asCSel(x1VReg, operandNR(x2VReg), operandNR(x3VReg), eq, true) }}, + {want: "4110839a", setup: func(i *instruction) { i.asCSel(x1VReg, operandNR(x2VReg), operandNR(x3VReg), ne, true) }}, + {want: "4100831a", setup: func(i *instruction) { i.asCSel(x1VReg, operandNR(x2VReg), operandNR(x3VReg), eq, false) }}, + {want: "4110831a", setup: func(i *instruction) { i.asCSel(x1VReg, operandNR(x2VReg), operandNR(x3VReg), ne, false) }}, + {want: "41cc631e", setup: func(i *instruction) { i.asFpuCSel(v1VReg, operandNR(v2VReg), operandNR(v3VReg), gt, true) }}, + {want: "41bc631e", setup: func(i *instruction) { i.asFpuCSel(v1VReg, operandNR(v2VReg), operandNR(v3VReg), lt, true) }}, + {want: "41cc231e", setup: func(i *instruction) { i.asFpuCSel(v1VReg, operandNR(v2VReg), operandNR(v3VReg), gt, false) }}, + {want: "41bc231e", setup: func(i *instruction) { i.asFpuCSel(v1VReg, operandNR(v2VReg), operandNR(v3VReg), lt, false) }}, + {want: "411c014e", setup: func(i *instruction) { i.asMovToVec(v1VReg, operandNR(x2VReg), vecArrangementB, 0) }}, + {want: "411c024e", setup: func(i *instruction) { i.asMovToVec(v1VReg, operandNR(x2VReg), vecArrangementH, 0) }}, + {want: "411c044e", setup: func(i *instruction) { i.asMovToVec(v1VReg, operandNR(x2VReg), vecArrangementS, 0) }}, + {want: "411c084e", setup: func(i *instruction) { i.asMovToVec(v1VReg, operandNR(x2VReg), vecArrangementD, 0) }}, + {want: "413c010e", setup: func(i *instruction) { i.asMovFromVec(x1VReg, operandNR(v2VReg), vecArrangementB, 0, false) }}, + {want: "413c020e", setup: func(i *instruction) { i.asMovFromVec(x1VReg, operandNR(v2VReg), vecArrangementH, 0, false) }}, + {want: "413c040e", setup: func(i *instruction) { i.asMovFromVec(x1VReg, operandNR(v2VReg), vecArrangementS, 0, false) }}, + {want: "413c084e", setup: func(i *instruction) { i.asMovFromVec(x1VReg, operandNR(v2VReg), vecArrangementD, 0, false) }}, + {want: "412c030e", setup: func(i *instruction) { i.asMovFromVec(x1VReg, operandNR(v2VReg), vecArrangementB, 1, true) }}, + {want: "412c060e", setup: func(i *instruction) { i.asMovFromVec(x1VReg, operandNR(v2VReg), vecArrangementH, 1, true) }}, + {want: "412c0c4e", setup: func(i *instruction) { i.asMovFromVec(x1VReg, operandNR(v2VReg), vecArrangementS, 1, true) }}, + {want: "410c084e", setup: func(i *instruction) { i.asVecDup(x1VReg, operandNR(v2VReg), vecArrangement2D) }}, {want: "4140036e", setup: func(i *instruction) { // 4140036e - i.asVecExtract(operandNR(x1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B, 8) + i.asVecExtract(x1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B, 8) }}, {want: "4138034e", setup: func(i *instruction) { - i.asVecPermute(vecOpZip1, operandNR(x1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) + i.asVecPermute(vecOpZip1, x1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) }}, {want: "4104214f", setup: func(i *instruction) { - i.asVecShiftImm(vecOpSshr, operandNR(x1VReg), operandNR(x2VReg), operandShiftImm(31), vecArrangement4S) + i.asVecShiftImm(vecOpSshr, x1VReg, operandNR(x2VReg), operandShiftImm(31), vecArrangement4S) }}, {want: "5b28030b", setup: func(i *instruction) { - i.asALU(aluOpAdd, operandNR(tmpRegVReg), operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpLSL), false) + i.asALU(aluOpAdd, tmpRegVReg, operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpLSL), false) }}, {want: "5b28038b", setup: func(i *instruction) { - i.asALU(aluOpAdd, operandNR(tmpRegVReg), operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpLSL), true) + i.asALU(aluOpAdd, tmpRegVReg, operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpLSL), true) }}, {want: "5b28032b", setup: func(i *instruction) { - i.asALU(aluOpAddS, operandNR(tmpRegVReg), operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpLSL), false) + i.asALU(aluOpAddS, tmpRegVReg, operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpLSL), false) }}, {want: "5b2803ab", setup: func(i *instruction) { - i.asALU(aluOpAddS, operandNR(tmpRegVReg), operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpLSL), true) + i.asALU(aluOpAddS, tmpRegVReg, operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpLSL), true) }}, {want: "5b28430b", setup: func(i *instruction) { - i.asALU(aluOpAdd, operandNR(tmpRegVReg), operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpLSR), false) + i.asALU(aluOpAdd, tmpRegVReg, operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpLSR), false) }}, {want: "5b28438b", setup: func(i *instruction) { - i.asALU(aluOpAdd, operandNR(tmpRegVReg), operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpLSR), true) + i.asALU(aluOpAdd, tmpRegVReg, operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpLSR), true) }}, {want: "5b28432b", setup: func(i *instruction) { - i.asALU(aluOpAddS, operandNR(tmpRegVReg), operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpLSR), false) + i.asALU(aluOpAddS, tmpRegVReg, operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpLSR), false) }}, {want: "5b2843ab", setup: func(i *instruction) { - i.asALU(aluOpAddS, operandNR(tmpRegVReg), operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpLSR), true) + i.asALU(aluOpAddS, tmpRegVReg, operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpLSR), true) }}, {want: "5b28830b", setup: func(i *instruction) { - i.asALU(aluOpAdd, operandNR(tmpRegVReg), operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpASR), false) + i.asALU(aluOpAdd, tmpRegVReg, operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpASR), false) }}, {want: "5b28838b", setup: func(i *instruction) { - i.asALU(aluOpAdd, operandNR(tmpRegVReg), operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpASR), true) + i.asALU(aluOpAdd, tmpRegVReg, operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpASR), true) }}, {want: "5b28832b", setup: func(i *instruction) { - i.asALU(aluOpAddS, operandNR(tmpRegVReg), operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpASR), false) + i.asALU(aluOpAddS, tmpRegVReg, operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpASR), false) }}, {want: "5b2883ab", setup: func(i *instruction) { - i.asALU(aluOpAddS, operandNR(tmpRegVReg), operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpASR), true) + i.asALU(aluOpAddS, tmpRegVReg, operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpASR), true) }}, {want: "5b28034b", setup: func(i *instruction) { - i.asALU(aluOpSub, operandNR(tmpRegVReg), operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpLSL), false) + i.asALU(aluOpSub, tmpRegVReg, operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpLSL), false) }}, {want: "5b2803cb", setup: func(i *instruction) { - i.asALU(aluOpSub, operandNR(tmpRegVReg), operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpLSL), true) + i.asALU(aluOpSub, tmpRegVReg, operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpLSL), true) }}, {want: "5b28036b", setup: func(i *instruction) { - i.asALU(aluOpSubS, operandNR(tmpRegVReg), operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpLSL), false) + i.asALU(aluOpSubS, tmpRegVReg, operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpLSL), false) }}, {want: "5b2803eb", setup: func(i *instruction) { - i.asALU(aluOpSubS, operandNR(tmpRegVReg), operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpLSL), true) + i.asALU(aluOpSubS, tmpRegVReg, operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpLSL), true) }}, {want: "5b28434b", setup: func(i *instruction) { - i.asALU(aluOpSub, operandNR(tmpRegVReg), operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpLSR), false) + i.asALU(aluOpSub, tmpRegVReg, operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpLSR), false) }}, {want: "5b2843cb", setup: func(i *instruction) { - i.asALU(aluOpSub, operandNR(tmpRegVReg), operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpLSR), true) + i.asALU(aluOpSub, tmpRegVReg, operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpLSR), true) }}, {want: "5b28436b", setup: func(i *instruction) { - i.asALU(aluOpSubS, operandNR(tmpRegVReg), operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpLSR), false) + i.asALU(aluOpSubS, tmpRegVReg, operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpLSR), false) }}, {want: "5b2843eb", setup: func(i *instruction) { - i.asALU(aluOpSubS, operandNR(tmpRegVReg), operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpLSR), true) + i.asALU(aluOpSubS, tmpRegVReg, operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpLSR), true) }}, {want: "5b28834b", setup: func(i *instruction) { - i.asALU(aluOpSub, operandNR(tmpRegVReg), operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpASR), false) + i.asALU(aluOpSub, tmpRegVReg, operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpASR), false) }}, {want: "5b2883cb", setup: func(i *instruction) { - i.asALU(aluOpSub, operandNR(tmpRegVReg), operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpASR), true) + i.asALU(aluOpSub, tmpRegVReg, operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpASR), true) }}, {want: "5b28836b", setup: func(i *instruction) { - i.asALU(aluOpSubS, operandNR(tmpRegVReg), operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpASR), false) + i.asALU(aluOpSubS, tmpRegVReg, operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpASR), false) }}, {want: "5b2883eb", setup: func(i *instruction) { - i.asALU(aluOpSubS, operandNR(tmpRegVReg), operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpASR), true) + i.asALU(aluOpSubS, tmpRegVReg, operandNR(x2VReg), operandSR(x3VReg, 10, shiftOpASR), true) }}, {want: "60033fd6", setup: func(i *instruction) { i.asCallIndirect(tmpRegVReg, nil) }}, {want: "fb633bcb", setup: func(i *instruction) { - i.asALU(aluOpSub, operandNR(tmpRegVReg), operandNR(spVReg), operandNR(tmpRegVReg), true) + i.asALU(aluOpSub, tmpRegVReg, operandNR(spVReg), operandNR(tmpRegVReg), true) }}, {want: "fb633b8b", setup: func(i *instruction) { - i.asALU(aluOpAdd, operandNR(tmpRegVReg), operandNR(spVReg), operandNR(tmpRegVReg), true) + i.asALU(aluOpAdd, tmpRegVReg, operandNR(spVReg), operandNR(tmpRegVReg), true) }}, {want: "2000020a", setup: func(i *instruction) { - i.asALU(aluOpAnd, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), false) + i.asALU(aluOpAnd, x0VReg, operandNR(x1VReg), operandNR(x2VReg), false) }}, {want: "2000028a", setup: func(i *instruction) { - i.asALU(aluOpAnd, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), true) + i.asALU(aluOpAnd, x0VReg, operandNR(x1VReg), operandNR(x2VReg), true) }}, {want: "2010028a", setup: func(i *instruction) { - i.asALU(aluOpAnd, operandNR(x0VReg), operandNR(x1VReg), operandSR(x2VReg, 4, shiftOpLSL), true) + i.asALU(aluOpAnd, x0VReg, operandNR(x1VReg), operandSR(x2VReg, 4, shiftOpLSL), true) }}, {want: "2030428a", setup: func(i *instruction) { - i.asALU(aluOpAnd, operandNR(x0VReg), operandNR(x1VReg), operandSR(x2VReg, 12, shiftOpLSR), true) + i.asALU(aluOpAnd, x0VReg, operandNR(x1VReg), operandSR(x2VReg, 12, shiftOpLSR), true) }}, {want: "2000026a", setup: func(i *instruction) { - i.asALU(aluOpAnds, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), false) + i.asALU(aluOpAnds, x0VReg, operandNR(x1VReg), operandNR(x2VReg), false) }}, {want: "200002ea", setup: func(i *instruction) { - i.asALU(aluOpAnds, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), true) + i.asALU(aluOpAnds, x0VReg, operandNR(x1VReg), operandNR(x2VReg), true) }}, {want: "201002ea", setup: func(i *instruction) { - i.asALU(aluOpAnds, operandNR(x0VReg), operandNR(x1VReg), operandSR(x2VReg, 4, shiftOpLSL), true) + i.asALU(aluOpAnds, x0VReg, operandNR(x1VReg), operandSR(x2VReg, 4, shiftOpLSL), true) }}, {want: "203042ea", setup: func(i *instruction) { - i.asALU(aluOpAnds, operandNR(x0VReg), operandNR(x1VReg), operandSR(x2VReg, 12, shiftOpLSR), true) + i.asALU(aluOpAnds, x0VReg, operandNR(x1VReg), operandSR(x2VReg, 12, shiftOpLSR), true) }}, {want: "2000022a", setup: func(i *instruction) { - i.asALU(aluOpOrr, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), false) + i.asALU(aluOpOrr, x0VReg, operandNR(x1VReg), operandNR(x2VReg), false) }}, {want: "200002aa", setup: func(i *instruction) { - i.asALU(aluOpOrr, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), true) + i.asALU(aluOpOrr, x0VReg, operandNR(x1VReg), operandNR(x2VReg), true) }}, {want: "201002aa", setup: func(i *instruction) { - i.asALU(aluOpOrr, operandNR(x0VReg), operandNR(x1VReg), operandSR(x2VReg, 4, shiftOpLSL), true) + i.asALU(aluOpOrr, x0VReg, operandNR(x1VReg), operandSR(x2VReg, 4, shiftOpLSL), true) }}, {want: "201082aa", setup: func(i *instruction) { - i.asALU(aluOpOrr, operandNR(x0VReg), operandNR(x1VReg), operandSR(x2VReg, 4, shiftOpASR), true) + i.asALU(aluOpOrr, x0VReg, operandNR(x1VReg), operandSR(x2VReg, 4, shiftOpASR), true) }}, {want: "2000024a", setup: func(i *instruction) { - i.asALU(aluOpEor, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), false) + i.asALU(aluOpEor, x0VReg, operandNR(x1VReg), operandNR(x2VReg), false) }}, {want: "200002ca", setup: func(i *instruction) { - i.asALU(aluOpEor, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), true) + i.asALU(aluOpEor, x0VReg, operandNR(x1VReg), operandNR(x2VReg), true) }}, {want: "201002ca", setup: func(i *instruction) { - i.asALU(aluOpEor, operandNR(x0VReg), operandNR(x1VReg), operandSR(x2VReg, 4, shiftOpLSL), true) + i.asALU(aluOpEor, x0VReg, operandNR(x1VReg), operandSR(x2VReg, 4, shiftOpLSL), true) }}, {want: "202cc21a", setup: func(i *instruction) { - i.asALU(aluOpRotR, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), false) + i.asALU(aluOpRotR, x0VReg, operandNR(x1VReg), operandNR(x2VReg), false) }}, {want: "202cc29a", setup: func(i *instruction) { - i.asALU(aluOpRotR, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), true) + i.asALU(aluOpRotR, x0VReg, operandNR(x1VReg), operandNR(x2VReg), true) }}, {want: "2000222a", setup: func(i *instruction) { - i.asALU(aluOpOrn, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), false) + i.asALU(aluOpOrn, x0VReg, operandNR(x1VReg), operandNR(x2VReg), false) }}, {want: "200022aa", setup: func(i *instruction) { - i.asALU(aluOpOrn, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), true) + i.asALU(aluOpOrn, x0VReg, operandNR(x1VReg), operandNR(x2VReg), true) }}, {want: "30000010", setup: func(i *instruction) { i.asAdr(v16VReg, 4) }}, {want: "50050030", setup: func(i *instruction) { i.asAdr(v16VReg, 169) }}, @@ -1093,16 +1094,16 @@ func TestInstruction_encode(t *testing.T) { {want: "101e306e", setup: func(i *instruction) { i.asLoadFpuConst128(v16VReg, 0, 0) }}, {want: "5000009c05000014ffffffffffffffffaaaaaaaaaaaaaaaa", setup: func(i *instruction) { i.asLoadFpuConst128(v16VReg, 0xffffffff_ffffffff, 0xaaaaaaaa_aaaaaaaa) }}, {want: "8220061b", setup: func(i *instruction) { - i.asALURRRR(aluOpMAdd, operandNR(x2VReg), operandNR(x4VReg), operandNR(x6VReg), operandNR(x8VReg), false) + i.asALURRRR(aluOpMAdd, x2VReg, operandNR(x4VReg), operandNR(x6VReg), x8VReg, false) }}, {want: "8220069b", setup: func(i *instruction) { - i.asALURRRR(aluOpMAdd, operandNR(x2VReg), operandNR(x4VReg), operandNR(x6VReg), operandNR(x8VReg), true) + i.asALURRRR(aluOpMAdd, x2VReg, operandNR(x4VReg), operandNR(x6VReg), x8VReg, true) }}, {want: "82a0061b", setup: func(i *instruction) { - i.asALURRRR(aluOpMSub, operandNR(x2VReg), operandNR(x4VReg), operandNR(x6VReg), operandNR(x8VReg), false) + i.asALURRRR(aluOpMSub, x2VReg, operandNR(x4VReg), operandNR(x6VReg), x8VReg, false) }}, {want: "82a0069b", setup: func(i *instruction) { - i.asALURRRR(aluOpMSub, operandNR(x2VReg), operandNR(x4VReg), operandNR(x6VReg), operandNR(x8VReg), true) + i.asALURRRR(aluOpMSub, x2VReg, operandNR(x4VReg), operandNR(x6VReg), x8VReg, true) }}, {want: "00213f1e", setup: func(i *instruction) { i.asFpuCmp(operandNR(v8VReg), operandNR(v31VReg), false) }}, {want: "00217f1e", setup: func(i *instruction) { i.asFpuCmp(operandNR(v8VReg), operandNR(v31VReg), true) }}, @@ -1200,76 +1201,76 @@ func TestInstruction_encode(t *testing.T) { {want: "f21fbf0e", setup: func(i *instruction) { i.asFpuMov64(v18VReg, v31VReg) }}, {want: "f21fbf4e", setup: func(i *instruction) { i.asFpuMov128(v18VReg, v31VReg) }}, {want: "40a034ab", setup: func(i *instruction) { - i.asALU(aluOpAddS, operandNR(x0VReg), operandNR(x2VReg), operandER(x20VReg, extendOpSXTH, 64), false) + i.asALU(aluOpAddS, x0VReg, operandNR(x2VReg), operandER(x20VReg, extendOpSXTH, 64), false) }}, {want: "4080348b", setup: func(i *instruction) { - i.asALU(aluOpAdd, operandNR(x0VReg), operandNR(x2VReg), operandER(x20VReg, extendOpSXTB, 64), false) + i.asALU(aluOpAdd, x0VReg, operandNR(x2VReg), operandER(x20VReg, extendOpSXTB, 64), false) }}, {want: "40a0348b", setup: func(i *instruction) { - i.asALU(aluOpAdd, operandNR(x0VReg), operandNR(x2VReg), operandER(x20VReg, extendOpSXTH, 64), false) + i.asALU(aluOpAdd, x0VReg, operandNR(x2VReg), operandER(x20VReg, extendOpSXTH, 64), false) }}, {want: "40c0348b", setup: func(i *instruction) { - i.asALU(aluOpAdd, operandNR(x0VReg), operandNR(x2VReg), operandER(x20VReg, extendOpSXTW, 64), false) + i.asALU(aluOpAdd, x0VReg, operandNR(x2VReg), operandER(x20VReg, extendOpSXTW, 64), false) }}, {want: "4080340b", setup: func(i *instruction) { - i.asALU(aluOpAdd, operandNR(x0VReg), operandNR(x2VReg), operandER(x20VReg, extendOpSXTB, 32), false) + i.asALU(aluOpAdd, x0VReg, operandNR(x2VReg), operandER(x20VReg, extendOpSXTB, 32), false) }}, {want: "40a0340b", setup: func(i *instruction) { - i.asALU(aluOpAdd, operandNR(x0VReg), operandNR(x2VReg), operandER(x20VReg, extendOpSXTH, 32), false) + i.asALU(aluOpAdd, x0VReg, operandNR(x2VReg), operandER(x20VReg, extendOpSXTH, 32), false) }}, {want: "40c0340b", setup: func(i *instruction) { - i.asALU(aluOpAdd, operandNR(x0VReg), operandNR(x2VReg), operandER(x20VReg, extendOpSXTW, 32), false) + i.asALU(aluOpAdd, x0VReg, operandNR(x2VReg), operandER(x20VReg, extendOpSXTW, 32), false) }}, {want: "400034eb", setup: func(i *instruction) { - i.asALU(aluOpSubS, operandNR(x0VReg), operandNR(x2VReg), operandER(x20VReg, extendOpUXTB, 64), false) + i.asALU(aluOpSubS, x0VReg, operandNR(x2VReg), operandER(x20VReg, extendOpUXTB, 64), false) }}, {want: "400034cb", setup: func(i *instruction) { - i.asALU(aluOpSub, operandNR(x0VReg), operandNR(x2VReg), operandER(x20VReg, extendOpUXTB, 64), false) + i.asALU(aluOpSub, x0VReg, operandNR(x2VReg), operandER(x20VReg, extendOpUXTB, 64), false) }}, {want: "402034cb", setup: func(i *instruction) { - i.asALU(aluOpSub, operandNR(x0VReg), operandNR(x2VReg), operandER(x20VReg, extendOpUXTH, 64), false) + i.asALU(aluOpSub, x0VReg, operandNR(x2VReg), operandER(x20VReg, extendOpUXTH, 64), false) }}, {want: "404034cb", setup: func(i *instruction) { - i.asALU(aluOpSub, operandNR(x0VReg), operandNR(x2VReg), operandER(x20VReg, extendOpUXTW, 64), false) + i.asALU(aluOpSub, x0VReg, operandNR(x2VReg), operandER(x20VReg, extendOpUXTW, 64), false) }}, {want: "4000344b", setup: func(i *instruction) { - i.asALU(aluOpSub, operandNR(x0VReg), operandNR(x2VReg), operandER(x20VReg, extendOpUXTB, 32), false) + i.asALU(aluOpSub, x0VReg, operandNR(x2VReg), operandER(x20VReg, extendOpUXTB, 32), false) }}, {want: "4020344b", setup: func(i *instruction) { - i.asALU(aluOpSub, operandNR(x0VReg), operandNR(x2VReg), operandER(x20VReg, extendOpUXTH, 32), false) + i.asALU(aluOpSub, x0VReg, operandNR(x2VReg), operandER(x20VReg, extendOpUXTH, 32), false) }}, {want: "4040344b", setup: func(i *instruction) { - i.asALU(aluOpSub, operandNR(x0VReg), operandNR(x2VReg), operandER(x20VReg, extendOpUXTW, 32), false) + i.asALU(aluOpSub, x0VReg, operandNR(x2VReg), operandER(x20VReg, extendOpUXTW, 32), false) }}, {want: "4000140b", setup: func(i *instruction) { - i.asALU(aluOpAdd, operandNR(x0VReg), operandNR(x2VReg), operandNR(x20VReg), false) + i.asALU(aluOpAdd, x0VReg, operandNR(x2VReg), operandNR(x20VReg), false) }}, {want: "4000148b", setup: func(i *instruction) { - i.asALU(aluOpAdd, operandNR(x0VReg), operandNR(x2VReg), operandNR(x20VReg), true) + i.asALU(aluOpAdd, x0VReg, operandNR(x2VReg), operandNR(x20VReg), true) }}, {want: "40001f8b", setup: func(i *instruction) { - i.asALU(aluOpAdd, operandNR(x0VReg), operandNR(x2VReg), operandNR(xzrVReg), true) + i.asALU(aluOpAdd, x0VReg, operandNR(x2VReg), operandNR(xzrVReg), true) }}, {want: "4000142b", setup: func(i *instruction) { - i.asALU(aluOpAddS, operandNR(x0VReg), operandNR(x2VReg), operandNR(x20VReg), false) + i.asALU(aluOpAddS, x0VReg, operandNR(x2VReg), operandNR(x20VReg), false) }}, {want: "400014ab", setup: func(i *instruction) { - i.asALU(aluOpAddS, operandNR(x0VReg), operandNR(x2VReg), operandNR(x20VReg), true) + i.asALU(aluOpAddS, x0VReg, operandNR(x2VReg), operandNR(x20VReg), true) }}, {want: "4000144b", setup: func(i *instruction) { - i.asALU(aluOpSub, operandNR(x0VReg), operandNR(x2VReg), operandNR(x20VReg), false) + i.asALU(aluOpSub, x0VReg, operandNR(x2VReg), operandNR(x20VReg), false) }}, {want: "400014cb", setup: func(i *instruction) { - i.asALU(aluOpSub, operandNR(x0VReg), operandNR(x2VReg), operandNR(x20VReg), true) + i.asALU(aluOpSub, x0VReg, operandNR(x2VReg), operandNR(x20VReg), true) }}, {want: "40001fcb", setup: func(i *instruction) { - i.asALU(aluOpSub, operandNR(x0VReg), operandNR(x2VReg), operandNR(xzrVReg), true) + i.asALU(aluOpSub, x0VReg, operandNR(x2VReg), operandNR(xzrVReg), true) }}, {want: "400014eb", setup: func(i *instruction) { - i.asALU(aluOpSubS, operandNR(x0VReg), operandNR(x2VReg), operandNR(x20VReg), true) + i.asALU(aluOpSubS, x0VReg, operandNR(x2VReg), operandNR(x20VReg), true) }}, {want: "40001feb", setup: func(i *instruction) { - i.asALU(aluOpSubS, operandNR(x0VReg), operandNR(x2VReg), operandNR(xzrVReg), true) + i.asALU(aluOpSubS, x0VReg, operandNR(x2VReg), operandNR(xzrVReg), true) }}, {want: "c0035fd6", setup: func(i *instruction) { i.asRet() }}, {want: "e303042a", setup: func(i *instruction) { i.asMove32(x3VReg, x4VReg) }}, @@ -1279,22 +1280,22 @@ func TestInstruction_encode(t *testing.T) { {want: "9f000091", setup: func(i *instruction) { i.asMove64(spVReg, x4VReg) }}, {want: "e0030091", setup: func(i *instruction) { i.asMove64(x0VReg, spVReg) }}, {want: "e17bc1a8", setup: func(i *instruction) { - i.asLoadPair64(x1VReg, x30VReg, addressModePreOrPostIndex(spVReg, 16, false)) + i.asLoadPair64(x1VReg, x30VReg, addressModePreOrPostIndex(m, spVReg, 16, false)) }}, {want: "e17bc1a9", setup: func(i *instruction) { - i.asLoadPair64(x1VReg, x30VReg, addressModePreOrPostIndex(spVReg, 16, true)) + i.asLoadPair64(x1VReg, x30VReg, addressModePreOrPostIndex(m, spVReg, 16, true)) }}, {want: "e17b81a8", setup: func(i *instruction) { - i.asStorePair64(x1VReg, x30VReg, addressModePreOrPostIndex(spVReg, 16, false)) + i.asStorePair64(x1VReg, x30VReg, addressModePreOrPostIndex(m, spVReg, 16, false)) }}, {want: "e17b81a9", setup: func(i *instruction) { - i.asStorePair64(x1VReg, x30VReg, addressModePreOrPostIndex(spVReg, 16, true)) + i.asStorePair64(x1VReg, x30VReg, addressModePreOrPostIndex(m, spVReg, 16, true)) }}, {want: "e17f81a9", setup: func(i *instruction) { - i.asStorePair64(x1VReg, xzrVReg, addressModePreOrPostIndex(spVReg, 16, true)) + i.asStorePair64(x1VReg, xzrVReg, addressModePreOrPostIndex(m, spVReg, 16, true)) }}, {want: "ff7f81a9", setup: func(i *instruction) { - i.asStorePair64(xzrVReg, xzrVReg, addressModePreOrPostIndex(spVReg, 16, true)) + i.asStorePair64(xzrVReg, xzrVReg, addressModePreOrPostIndex(m, spVReg, 16, true)) }}, {want: "20000014", setup: func(i *instruction) { i.asBr(dummyLabel) @@ -1317,478 +1318,478 @@ func TestInstruction_encode(t *testing.T) { i.condBrOffsetResolve(0x80) }}, {want: "8328321e", setup: func(i *instruction) { - i.asFpuRRR(fpuBinOpAdd, operandNR(v3VReg), operandNR(v4VReg), operandNR(v18VReg), false) + i.asFpuRRR(fpuBinOpAdd, v3VReg, operandNR(v4VReg), operandNR(v18VReg), false) }}, {want: "8328721e", setup: func(i *instruction) { - i.asFpuRRR(fpuBinOpAdd, operandNR(v3VReg), operandNR(v4VReg), operandNR(v18VReg), true) + i.asFpuRRR(fpuBinOpAdd, v3VReg, operandNR(v4VReg), operandNR(v18VReg), true) }}, {want: "8338321e", setup: func(i *instruction) { - i.asFpuRRR(fpuBinOpSub, operandNR(v3VReg), operandNR(v4VReg), operandNR(v18VReg), false) + i.asFpuRRR(fpuBinOpSub, v3VReg, operandNR(v4VReg), operandNR(v18VReg), false) }}, {want: "8338721e", setup: func(i *instruction) { - i.asFpuRRR(fpuBinOpSub, operandNR(v3VReg), operandNR(v4VReg), operandNR(v18VReg), true) + i.asFpuRRR(fpuBinOpSub, v3VReg, operandNR(v4VReg), operandNR(v18VReg), true) }}, {want: "8308321e", setup: func(i *instruction) { - i.asFpuRRR(fpuBinOpMul, operandNR(v3VReg), operandNR(v4VReg), operandNR(v18VReg), false) + i.asFpuRRR(fpuBinOpMul, v3VReg, operandNR(v4VReg), operandNR(v18VReg), false) }}, {want: "8308721e", setup: func(i *instruction) { - i.asFpuRRR(fpuBinOpMul, operandNR(v3VReg), operandNR(v4VReg), operandNR(v18VReg), true) + i.asFpuRRR(fpuBinOpMul, v3VReg, operandNR(v4VReg), operandNR(v18VReg), true) }}, {want: "8318321e", setup: func(i *instruction) { - i.asFpuRRR(fpuBinOpDiv, operandNR(v3VReg), operandNR(v4VReg), operandNR(v18VReg), false) + i.asFpuRRR(fpuBinOpDiv, v3VReg, operandNR(v4VReg), operandNR(v18VReg), false) }}, {want: "8318721e", setup: func(i *instruction) { - i.asFpuRRR(fpuBinOpDiv, operandNR(v3VReg), operandNR(v4VReg), operandNR(v18VReg), true) + i.asFpuRRR(fpuBinOpDiv, v3VReg, operandNR(v4VReg), operandNR(v18VReg), true) }}, {want: "8348321e", setup: func(i *instruction) { - i.asFpuRRR(fpuBinOpMax, operandNR(v3VReg), operandNR(v4VReg), operandNR(v18VReg), false) + i.asFpuRRR(fpuBinOpMax, v3VReg, operandNR(v4VReg), operandNR(v18VReg), false) }}, {want: "8348721e", setup: func(i *instruction) { - i.asFpuRRR(fpuBinOpMax, operandNR(v3VReg), operandNR(v4VReg), operandNR(v18VReg), true) + i.asFpuRRR(fpuBinOpMax, v3VReg, operandNR(v4VReg), operandNR(v18VReg), true) }}, {want: "8358321e", setup: func(i *instruction) { - i.asFpuRRR(fpuBinOpMin, operandNR(v3VReg), operandNR(v4VReg), operandNR(v18VReg), false) + i.asFpuRRR(fpuBinOpMin, v3VReg, operandNR(v4VReg), operandNR(v18VReg), false) }}, {want: "8358721e", setup: func(i *instruction) { - i.asFpuRRR(fpuBinOpMin, operandNR(v3VReg), operandNR(v4VReg), operandNR(v18VReg), true) + i.asFpuRRR(fpuBinOpMin, v3VReg, operandNR(v4VReg), operandNR(v18VReg), true) }}, {want: "49fd7f11", setup: func(i *instruction) { - i.asALU(aluOpAdd, operandNR(x9VReg), operandNR(x10VReg), operandImm12(0b111111111111, 0b1), false) + i.asALU(aluOpAdd, x9VReg, operandNR(x10VReg), operandImm12(0b111111111111, 0b1), false) }}, {want: "e9ff7f91", setup: func(i *instruction) { - i.asALU(aluOpAdd, operandNR(x9VReg), operandNR(spVReg), operandImm12(0b111111111111, 0b1), true) + i.asALU(aluOpAdd, x9VReg, operandNR(spVReg), operandImm12(0b111111111111, 0b1), true) }}, {want: "49fd3f11", setup: func(i *instruction) { - i.asALU(aluOpAdd, operandNR(x9VReg), operandNR(x10VReg), operandImm12(0b111111111111, 0b0), false) + i.asALU(aluOpAdd, x9VReg, operandNR(x10VReg), operandImm12(0b111111111111, 0b0), false) }}, {want: "5ffd3f91", setup: func(i *instruction) { - i.asALU(aluOpAdd, operandNR(spVReg), operandNR(x10VReg), operandImm12(0b111111111111, 0b0), true) + i.asALU(aluOpAdd, spVReg, operandNR(x10VReg), operandImm12(0b111111111111, 0b0), true) }}, {want: "49fd7f31", setup: func(i *instruction) { - i.asALU(aluOpAddS, operandNR(x9VReg), operandNR(x10VReg), operandImm12(0b111111111111, 0b1), false) + i.asALU(aluOpAddS, x9VReg, operandNR(x10VReg), operandImm12(0b111111111111, 0b1), false) }}, {want: "49fd7fb1", setup: func(i *instruction) { - i.asALU(aluOpAddS, operandNR(x9VReg), operandNR(x10VReg), operandImm12(0b111111111111, 0b1), true) + i.asALU(aluOpAddS, x9VReg, operandNR(x10VReg), operandImm12(0b111111111111, 0b1), true) }}, {want: "49fd3f31", setup: func(i *instruction) { - i.asALU(aluOpAddS, operandNR(x9VReg), operandNR(x10VReg), operandImm12(0b111111111111, 0b0), false) + i.asALU(aluOpAddS, x9VReg, operandNR(x10VReg), operandImm12(0b111111111111, 0b0), false) }}, {want: "49fd3fb1", setup: func(i *instruction) { - i.asALU(aluOpAddS, operandNR(x9VReg), operandNR(x10VReg), operandImm12(0b111111111111, 0b0), true) + i.asALU(aluOpAddS, x9VReg, operandNR(x10VReg), operandImm12(0b111111111111, 0b0), true) }}, {want: "49fd7f51", setup: func(i *instruction) { - i.asALU(aluOpSub, operandNR(x9VReg), operandNR(x10VReg), operandImm12(0b111111111111, 0b1), false) + i.asALU(aluOpSub, x9VReg, operandNR(x10VReg), operandImm12(0b111111111111, 0b1), false) }}, {want: "e9ff7fd1", setup: func(i *instruction) { - i.asALU(aluOpSub, operandNR(x9VReg), operandNR(spVReg), operandImm12(0b111111111111, 0b1), true) + i.asALU(aluOpSub, x9VReg, operandNR(spVReg), operandImm12(0b111111111111, 0b1), true) }}, {want: "49fd3f51", setup: func(i *instruction) { - i.asALU(aluOpSub, operandNR(x9VReg), operandNR(x10VReg), operandImm12(0b111111111111, 0b0), false) + i.asALU(aluOpSub, x9VReg, operandNR(x10VReg), operandImm12(0b111111111111, 0b0), false) }}, {want: "5ffd3fd1", setup: func(i *instruction) { - i.asALU(aluOpSub, operandNR(spVReg), operandNR(x10VReg), operandImm12(0b111111111111, 0b0), true) + i.asALU(aluOpSub, spVReg, operandNR(x10VReg), operandImm12(0b111111111111, 0b0), true) }}, {want: "49fd7f71", setup: func(i *instruction) { - i.asALU(aluOpSubS, operandNR(x9VReg), operandNR(x10VReg), operandImm12(0b111111111111, 0b1), false) + i.asALU(aluOpSubS, x9VReg, operandNR(x10VReg), operandImm12(0b111111111111, 0b1), false) }}, {want: "49fd7ff1", setup: func(i *instruction) { - i.asALU(aluOpSubS, operandNR(x9VReg), operandNR(x10VReg), operandImm12(0b111111111111, 0b1), true) + i.asALU(aluOpSubS, x9VReg, operandNR(x10VReg), operandImm12(0b111111111111, 0b1), true) }}, {want: "49fd3f71", setup: func(i *instruction) { - i.asALU(aluOpSubS, operandNR(x9VReg), operandNR(x10VReg), operandImm12(0b111111111111, 0b0), false) + i.asALU(aluOpSubS, x9VReg, operandNR(x10VReg), operandImm12(0b111111111111, 0b0), false) }}, {want: "49fd3ff1", setup: func(i *instruction) { - i.asALU(aluOpSubS, operandNR(x9VReg), operandNR(x10VReg), operandImm12(0b111111111111, 0b0), true) + i.asALU(aluOpSubS, x9VReg, operandNR(x10VReg), operandImm12(0b111111111111, 0b0), true) }}, {want: "4020d41a", setup: func(i *instruction) { - i.asALU(aluOpLsl, operandNR(x0VReg), operandNR(x2VReg), operandNR(x20VReg), false) + i.asALU(aluOpLsl, x0VReg, operandNR(x2VReg), operandNR(x20VReg), false) }}, {want: "4020d49a", setup: func(i *instruction) { - i.asALU(aluOpLsl, operandNR(x0VReg), operandNR(x2VReg), operandNR(x20VReg), true) + i.asALU(aluOpLsl, x0VReg, operandNR(x2VReg), operandNR(x20VReg), true) }}, {want: "4024d41a", setup: func(i *instruction) { - i.asALU(aluOpLsr, operandNR(x0VReg), operandNR(x2VReg), operandNR(x20VReg), false) + i.asALU(aluOpLsr, x0VReg, operandNR(x2VReg), operandNR(x20VReg), false) }}, {want: "4024d49a", setup: func(i *instruction) { - i.asALU(aluOpLsr, operandNR(x0VReg), operandNR(x2VReg), operandNR(x20VReg), true) + i.asALU(aluOpLsr, x0VReg, operandNR(x2VReg), operandNR(x20VReg), true) }}, {want: "4028d41a", setup: func(i *instruction) { - i.asALU(aluOpAsr, operandNR(x0VReg), operandNR(x2VReg), operandNR(x20VReg), false) + i.asALU(aluOpAsr, x0VReg, operandNR(x2VReg), operandNR(x20VReg), false) }}, {want: "4028d49a", setup: func(i *instruction) { - i.asALU(aluOpAsr, operandNR(x0VReg), operandNR(x2VReg), operandNR(x20VReg), true) + i.asALU(aluOpAsr, x0VReg, operandNR(x2VReg), operandNR(x20VReg), true) }}, {want: "400cd49a", setup: func(i *instruction) { - i.asALU(aluOpSDiv, operandNR(x0VReg), operandNR(x2VReg), operandNR(x20VReg), true) + i.asALU(aluOpSDiv, x0VReg, operandNR(x2VReg), operandNR(x20VReg), true) }}, {want: "400cd41a", setup: func(i *instruction) { - i.asALU(aluOpSDiv, operandNR(x0VReg), operandNR(x2VReg), operandNR(x20VReg), false) + i.asALU(aluOpSDiv, x0VReg, operandNR(x2VReg), operandNR(x20VReg), false) }}, {want: "4008d49a", setup: func(i *instruction) { - i.asALU(aluOpUDiv, operandNR(x0VReg), operandNR(x2VReg), operandNR(x20VReg), true) + i.asALU(aluOpUDiv, x0VReg, operandNR(x2VReg), operandNR(x20VReg), true) }}, {want: "4008d41a", setup: func(i *instruction) { - i.asALU(aluOpUDiv, operandNR(x0VReg), operandNR(x2VReg), operandNR(x20VReg), false) + i.asALU(aluOpUDiv, x0VReg, operandNR(x2VReg), operandNR(x20VReg), false) }}, {want: "407c0013", setup: func(i *instruction) { - i.asALUShift(aluOpAsr, operandNR(x0VReg), operandNR(x2VReg), operandShiftImm(0), false) + i.asALUShift(aluOpAsr, x0VReg, operandNR(x2VReg), operandShiftImm(0), false) }}, {want: "40fc4093", setup: func(i *instruction) { - i.asALUShift(aluOpAsr, operandNR(x0VReg), operandNR(x2VReg), operandShiftImm(0), true) + i.asALUShift(aluOpAsr, x0VReg, operandNR(x2VReg), operandShiftImm(0), true) }}, {want: "407c0113", setup: func(i *instruction) { - i.asALUShift(aluOpAsr, operandNR(x0VReg), operandNR(x2VReg), operandShiftImm(1), false) + i.asALUShift(aluOpAsr, x0VReg, operandNR(x2VReg), operandShiftImm(1), false) }}, {want: "407c1f13", setup: func(i *instruction) { - i.asALUShift(aluOpAsr, operandNR(x0VReg), operandNR(x2VReg), operandShiftImm(31), false) + i.asALUShift(aluOpAsr, x0VReg, operandNR(x2VReg), operandShiftImm(31), false) }}, {want: "40fc4193", setup: func(i *instruction) { - i.asALUShift(aluOpAsr, operandNR(x0VReg), operandNR(x2VReg), operandShiftImm(1), true) + i.asALUShift(aluOpAsr, x0VReg, operandNR(x2VReg), operandShiftImm(1), true) }}, {want: "40fc5f93", setup: func(i *instruction) { - i.asALUShift(aluOpAsr, operandNR(x0VReg), operandNR(x2VReg), operandShiftImm(31), true) + i.asALUShift(aluOpAsr, x0VReg, operandNR(x2VReg), operandShiftImm(31), true) }}, {want: "40fc7f93", setup: func(i *instruction) { - i.asALUShift(aluOpAsr, operandNR(x0VReg), operandNR(x2VReg), operandShiftImm(63), true) + i.asALUShift(aluOpAsr, x0VReg, operandNR(x2VReg), operandShiftImm(63), true) }}, {want: "407c0153", setup: func(i *instruction) { - i.asALUShift(aluOpLsr, operandNR(x0VReg), operandNR(x2VReg), operandShiftImm(1), false) + i.asALUShift(aluOpLsr, x0VReg, operandNR(x2VReg), operandShiftImm(1), false) }}, {want: "407c1f53", setup: func(i *instruction) { - i.asALUShift(aluOpLsr, operandNR(x0VReg), operandNR(x2VReg), operandShiftImm(31), false) + i.asALUShift(aluOpLsr, x0VReg, operandNR(x2VReg), operandShiftImm(31), false) }}, {want: "40fc41d3", setup: func(i *instruction) { - i.asALUShift(aluOpLsr, operandNR(x0VReg), operandNR(x2VReg), operandShiftImm(1), true) + i.asALUShift(aluOpLsr, x0VReg, operandNR(x2VReg), operandShiftImm(1), true) }}, {want: "40fc5fd3", setup: func(i *instruction) { - i.asALUShift(aluOpLsr, operandNR(x0VReg), operandNR(x2VReg), operandShiftImm(31), true) + i.asALUShift(aluOpLsr, x0VReg, operandNR(x2VReg), operandShiftImm(31), true) }}, {want: "40fc7fd3", setup: func(i *instruction) { - i.asALUShift(aluOpLsr, operandNR(x0VReg), operandNR(x2VReg), operandShiftImm(63), true) + i.asALUShift(aluOpLsr, x0VReg, operandNR(x2VReg), operandShiftImm(63), true) }}, {want: "407c0053", setup: func(i *instruction) { - i.asALUShift(aluOpLsl, operandNR(x0VReg), operandNR(x2VReg), operandShiftImm(0), false) + i.asALUShift(aluOpLsl, x0VReg, operandNR(x2VReg), operandShiftImm(0), false) }}, {want: "40fc40d3", setup: func(i *instruction) { - i.asALUShift(aluOpLsl, operandNR(x0VReg), operandNR(x2VReg), operandShiftImm(0), true) + i.asALUShift(aluOpLsl, x0VReg, operandNR(x2VReg), operandShiftImm(0), true) }}, {want: "40781f53", setup: func(i *instruction) { - i.asALUShift(aluOpLsl, operandNR(x0VReg), operandNR(x2VReg), operandShiftImm(1), false) + i.asALUShift(aluOpLsl, x0VReg, operandNR(x2VReg), operandShiftImm(1), false) }}, {want: "40000153", setup: func(i *instruction) { - i.asALUShift(aluOpLsl, operandNR(x0VReg), operandNR(x2VReg), operandShiftImm(31), false) + i.asALUShift(aluOpLsl, x0VReg, operandNR(x2VReg), operandShiftImm(31), false) }}, {want: "40f87fd3", setup: func(i *instruction) { - i.asALUShift(aluOpLsl, operandNR(x0VReg), operandNR(x2VReg), operandShiftImm(1), true) + i.asALUShift(aluOpLsl, x0VReg, operandNR(x2VReg), operandShiftImm(1), true) }}, {want: "408061d3", setup: func(i *instruction) { - i.asALUShift(aluOpLsl, operandNR(x0VReg), operandNR(x2VReg), operandShiftImm(31), true) + i.asALUShift(aluOpLsl, x0VReg, operandNR(x2VReg), operandShiftImm(31), true) }}, {want: "400041d3", setup: func(i *instruction) { - i.asALUShift(aluOpLsl, operandNR(x0VReg), operandNR(x2VReg), operandShiftImm(63), true) + i.asALUShift(aluOpLsl, x0VReg, operandNR(x2VReg), operandShiftImm(63), true) }}, {want: "4000c05a", setup: func(i *instruction) { i.asBitRR(bitOpRbit, x0VReg, x2VReg, false) }}, {want: "4000c0da", setup: func(i *instruction) { i.asBitRR(bitOpRbit, x0VReg, x2VReg, true) }}, {want: "4010c05a", setup: func(i *instruction) { i.asBitRR(bitOpClz, x0VReg, x2VReg, false) }}, {want: "4010c0da", setup: func(i *instruction) { i.asBitRR(bitOpClz, x0VReg, x2VReg, true) }}, {want: "4138302e", setup: func(i *instruction) { - i.asVecLanes(vecOpUaddlv, operandNR(v1VReg), operandNR(v2VReg), vecArrangement8B) + i.asVecLanes(vecOpUaddlv, v1VReg, operandNR(v2VReg), vecArrangement8B) }}, {want: "4138306e", setup: func(i *instruction) { - i.asVecLanes(vecOpUaddlv, operandNR(v1VReg), operandNR(v2VReg), vecArrangement16B) + i.asVecLanes(vecOpUaddlv, v1VReg, operandNR(v2VReg), vecArrangement16B) }}, {want: "4138702e", setup: func(i *instruction) { - i.asVecLanes(vecOpUaddlv, operandNR(v1VReg), operandNR(v2VReg), vecArrangement4H) + i.asVecLanes(vecOpUaddlv, v1VReg, operandNR(v2VReg), vecArrangement4H) }}, {want: "4138706e", setup: func(i *instruction) { - i.asVecLanes(vecOpUaddlv, operandNR(v1VReg), operandNR(v2VReg), vecArrangement8H) + i.asVecLanes(vecOpUaddlv, v1VReg, operandNR(v2VReg), vecArrangement8H) }}, {want: "4138b06e", setup: func(i *instruction) { - i.asVecLanes(vecOpUaddlv, operandNR(v1VReg), operandNR(v2VReg), vecArrangement4S) + i.asVecLanes(vecOpUaddlv, v1VReg, operandNR(v2VReg), vecArrangement4S) }}, {want: "41c0230e", setup: func(i *instruction) { - i.asVecRRR(vecOpSmull, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) + i.asVecRRR(vecOpSmull, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) }}, {want: "41c0630e", setup: func(i *instruction) { - i.asVecRRR(vecOpSmull, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) + i.asVecRRR(vecOpSmull, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) }}, {want: "41c0a30e", setup: func(i *instruction) { - i.asVecRRR(vecOpSmull, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) + i.asVecRRR(vecOpSmull, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) }}, {want: "41c0234e", setup: func(i *instruction) { - i.asVecRRR(vecOpSmull2, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) + i.asVecRRR(vecOpSmull2, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) }}, {want: "41c0634e", setup: func(i *instruction) { - i.asVecRRR(vecOpSmull2, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) + i.asVecRRR(vecOpSmull2, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) }}, {want: "41c0a34e", setup: func(i *instruction) { - i.asVecRRR(vecOpSmull2, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) + i.asVecRRR(vecOpSmull2, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4S) }}, {want: "411c630e", setup: func(i *instruction) { - i.asVecRRR(vecOpBic, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) + i.asVecRRR(vecOpBic, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) }}, {want: "411c634e", setup: func(i *instruction) { - i.asVecRRR(vecOpBic, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) + i.asVecRRR(vecOpBic, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) }}, {want: "411c632e", setup: func(i *instruction) { - i.asVecRRRRewrite(vecOpBsl, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) + i.asVecRRRRewrite(vecOpBsl, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) }}, {want: "411c636e", setup: func(i *instruction) { - i.asVecRRRRewrite(vecOpBsl, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) + i.asVecRRRRewrite(vecOpBsl, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) }}, {want: "4158202e", setup: func(i *instruction) { - i.asVecMisc(vecOpNot, operandNR(v1VReg), operandNR(v2VReg), vecArrangement8B) + i.asVecMisc(vecOpNot, v1VReg, operandNR(v2VReg), vecArrangement8B) }}, {want: "4158206e", setup: func(i *instruction) { - i.asVecMisc(vecOpNot, operandNR(v1VReg), operandNR(v2VReg), vecArrangement16B) + i.asVecMisc(vecOpNot, v1VReg, operandNR(v2VReg), vecArrangement16B) }}, {want: "411c230e", setup: func(i *instruction) { - i.asVecRRR(vecOpAnd, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) + i.asVecRRR(vecOpAnd, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) }}, {want: "411c234e", setup: func(i *instruction) { - i.asVecRRR(vecOpAnd, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) + i.asVecRRR(vecOpAnd, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) }}, {want: "411ca30e", setup: func(i *instruction) { - i.asVecRRR(vecOpOrr, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) + i.asVecRRR(vecOpOrr, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) }}, {want: "411ca34e", setup: func(i *instruction) { - i.asVecRRR(vecOpOrr, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) + i.asVecRRR(vecOpOrr, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) }}, {want: "4144230e", setup: func(i *instruction) { - i.asVecRRR(vecOpSshl, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) + i.asVecRRR(vecOpSshl, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) }}, {want: "4144234e", setup: func(i *instruction) { - i.asVecRRR(vecOpSshl, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) + i.asVecRRR(vecOpSshl, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) }}, {want: "4144630e", setup: func(i *instruction) { - i.asVecRRR(vecOpSshl, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) + i.asVecRRR(vecOpSshl, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) }}, {want: "4144634e", setup: func(i *instruction) { - i.asVecRRR(vecOpSshl, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) + i.asVecRRR(vecOpSshl, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) }}, {want: "4144a30e", setup: func(i *instruction) { - i.asVecRRR(vecOpSshl, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) + i.asVecRRR(vecOpSshl, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) }}, {want: "4144e34e", setup: func(i *instruction) { - i.asVecRRR(vecOpSshl, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) + i.asVecRRR(vecOpSshl, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) }}, {want: "4144a30e", setup: func(i *instruction) { - i.asVecRRR(vecOpSshl, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) + i.asVecRRR(vecOpSshl, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) }}, {want: "4144232e", setup: func(i *instruction) { - i.asVecRRR(vecOpUshl, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) + i.asVecRRR(vecOpUshl, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B) }}, {want: "4144236e", setup: func(i *instruction) { - i.asVecRRR(vecOpUshl, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) + i.asVecRRR(vecOpUshl, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B) }}, {want: "4144632e", setup: func(i *instruction) { - i.asVecRRR(vecOpUshl, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) + i.asVecRRR(vecOpUshl, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement4H) }}, {want: "4144636e", setup: func(i *instruction) { - i.asVecRRR(vecOpUshl, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) + i.asVecRRR(vecOpUshl, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement8H) }}, {want: "4144a32e", setup: func(i *instruction) { - i.asVecRRR(vecOpUshl, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) + i.asVecRRR(vecOpUshl, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) }}, {want: "4144e36e", setup: func(i *instruction) { - i.asVecRRR(vecOpUshl, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) + i.asVecRRR(vecOpUshl, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D) }}, {want: "4144a30e", setup: func(i *instruction) { - i.asVecRRR(vecOpSshl, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) + i.asVecRRR(vecOpSshl, v1VReg, operandNR(v2VReg), operandNR(v3VReg), vecArrangement2S) }}, - {want: "4158200e", setup: func(i *instruction) { i.asVecMisc(vecOpCnt, operandNR(v1VReg), operandNR(v2VReg), vecArrangement8B) }}, - {want: "4158204e", setup: func(i *instruction) { i.asVecMisc(vecOpCnt, operandNR(v1VReg), operandNR(v2VReg), vecArrangement16B) }}, + {want: "4158200e", setup: func(i *instruction) { i.asVecMisc(vecOpCnt, v1VReg, operandNR(v2VReg), vecArrangement8B) }}, + {want: "4158204e", setup: func(i *instruction) { i.asVecMisc(vecOpCnt, v1VReg, operandNR(v2VReg), vecArrangement16B) }}, {want: "41c0221e", setup: func(i *instruction) { - i.asFpuRR(fpuUniOpCvt32To64, operandNR(v1VReg), operandNR(v2VReg), true) + i.asFpuRR(fpuUniOpCvt32To64, v1VReg, operandNR(v2VReg), true) }}, {want: "4140621e", setup: func(i *instruction) { - i.asFpuRR(fpuUniOpCvt64To32, operandNR(v1VReg), operandNR(v2VReg), true) + i.asFpuRR(fpuUniOpCvt64To32, v1VReg, operandNR(v2VReg), true) }}, {want: "4140211e", setup: func(i *instruction) { - i.asFpuRR(fpuUniOpNeg, operandNR(v1VReg), operandNR(v2VReg), false) + i.asFpuRR(fpuUniOpNeg, v1VReg, operandNR(v2VReg), false) }}, {want: "41c0211e", setup: func(i *instruction) { - i.asFpuRR(fpuUniOpSqrt, operandNR(v1VReg), operandNR(v2VReg), false) + i.asFpuRR(fpuUniOpSqrt, v1VReg, operandNR(v2VReg), false) }}, {want: "41c0611e", setup: func(i *instruction) { - i.asFpuRR(fpuUniOpSqrt, operandNR(v1VReg), operandNR(v2VReg), true) + i.asFpuRR(fpuUniOpSqrt, v1VReg, operandNR(v2VReg), true) }}, {want: "41c0241e", setup: func(i *instruction) { - i.asFpuRR(fpuUniOpRoundPlus, operandNR(v1VReg), operandNR(v2VReg), false) + i.asFpuRR(fpuUniOpRoundPlus, v1VReg, operandNR(v2VReg), false) }}, {want: "41c0641e", setup: func(i *instruction) { - i.asFpuRR(fpuUniOpRoundPlus, operandNR(v1VReg), operandNR(v2VReg), true) + i.asFpuRR(fpuUniOpRoundPlus, v1VReg, operandNR(v2VReg), true) }}, {want: "4140251e", setup: func(i *instruction) { - i.asFpuRR(fpuUniOpRoundMinus, operandNR(v1VReg), operandNR(v2VReg), false) + i.asFpuRR(fpuUniOpRoundMinus, v1VReg, operandNR(v2VReg), false) }}, {want: "4140651e", setup: func(i *instruction) { - i.asFpuRR(fpuUniOpRoundMinus, operandNR(v1VReg), operandNR(v2VReg), true) + i.asFpuRR(fpuUniOpRoundMinus, v1VReg, operandNR(v2VReg), true) }}, {want: "41c0251e", setup: func(i *instruction) { - i.asFpuRR(fpuUniOpRoundZero, operandNR(v1VReg), operandNR(v2VReg), false) + i.asFpuRR(fpuUniOpRoundZero, v1VReg, operandNR(v2VReg), false) }}, {want: "41c0651e", setup: func(i *instruction) { - i.asFpuRR(fpuUniOpRoundZero, operandNR(v1VReg), operandNR(v2VReg), true) + i.asFpuRR(fpuUniOpRoundZero, v1VReg, operandNR(v2VReg), true) }}, {want: "4140241e", setup: func(i *instruction) { - i.asFpuRR(fpuUniOpRoundNearest, operandNR(v1VReg), operandNR(v2VReg), false) + i.asFpuRR(fpuUniOpRoundNearest, v1VReg, operandNR(v2VReg), false) }}, {want: "4140641e", setup: func(i *instruction) { - i.asFpuRR(fpuUniOpRoundNearest, operandNR(v1VReg), operandNR(v2VReg), true) + i.asFpuRR(fpuUniOpRoundNearest, v1VReg, operandNR(v2VReg), true) }}, - {want: "4140611e", setup: func(i *instruction) { i.asFpuRR(fpuUniOpNeg, operandNR(v1VReg), operandNR(v2VReg), true) }}, - {want: "41c0404d", setup: func(i *instruction) { i.asVecLoad1R(operandNR(v1VReg), operandNR(x2VReg), vecArrangement16B) }}, - {want: "41c4404d", setup: func(i *instruction) { i.asVecLoad1R(operandNR(v1VReg), operandNR(x2VReg), vecArrangement8H) }}, - {want: "41c8404d", setup: func(i *instruction) { i.asVecLoad1R(operandNR(v1VReg), operandNR(x2VReg), vecArrangement4S) }}, - {want: "41cc404d", setup: func(i *instruction) { i.asVecLoad1R(operandNR(v1VReg), operandNR(x2VReg), vecArrangement2D) }}, + {want: "4140611e", setup: func(i *instruction) { i.asFpuRR(fpuUniOpNeg, v1VReg, operandNR(v2VReg), true) }}, + {want: "41c0404d", setup: func(i *instruction) { i.asVecLoad1R(v1VReg, operandNR(x2VReg), vecArrangement16B) }}, + {want: "41c4404d", setup: func(i *instruction) { i.asVecLoad1R(v1VReg, operandNR(x2VReg), vecArrangement8H) }}, + {want: "41c8404d", setup: func(i *instruction) { i.asVecLoad1R(v1VReg, operandNR(x2VReg), vecArrangement4S) }}, + {want: "41cc404d", setup: func(i *instruction) { i.asVecLoad1R(v1VReg, operandNR(x2VReg), vecArrangement2D) }}, {want: "0200e1b8", setup: func(i *instruction) { - i.asAtomicRmw(atomicRmwOpAdd, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 4) + i.asAtomicRmw(atomicRmwOpAdd, x0VReg, x1VReg, x2VReg, 4) }}, {want: "0200e178", setup: func(i *instruction) { - i.asAtomicRmw(atomicRmwOpAdd, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 2) + i.asAtomicRmw(atomicRmwOpAdd, x0VReg, x1VReg, x2VReg, 2) }}, {want: "0200e138", setup: func(i *instruction) { - i.asAtomicRmw(atomicRmwOpAdd, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 1) + i.asAtomicRmw(atomicRmwOpAdd, x0VReg, x1VReg, x2VReg, 1) }}, {want: "0200e1f8", setup: func(i *instruction) { - i.asAtomicRmw(atomicRmwOpAdd, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 8) + i.asAtomicRmw(atomicRmwOpAdd, x0VReg, x1VReg, x2VReg, 8) }}, {want: "0200e1b8", setup: func(i *instruction) { - i.asAtomicRmw(atomicRmwOpAdd, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 4) + i.asAtomicRmw(atomicRmwOpAdd, x0VReg, x1VReg, x2VReg, 4) }}, {want: "0200e178", setup: func(i *instruction) { - i.asAtomicRmw(atomicRmwOpAdd, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 2) + i.asAtomicRmw(atomicRmwOpAdd, x0VReg, x1VReg, x2VReg, 2) }}, {want: "0200e138", setup: func(i *instruction) { - i.asAtomicRmw(atomicRmwOpAdd, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 1) + i.asAtomicRmw(atomicRmwOpAdd, x0VReg, x1VReg, x2VReg, 1) }}, {want: "0210e1b8", setup: func(i *instruction) { - i.asAtomicRmw(atomicRmwOpClr, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 4) + i.asAtomicRmw(atomicRmwOpClr, x0VReg, x1VReg, x2VReg, 4) }}, {want: "0210e178", setup: func(i *instruction) { - i.asAtomicRmw(atomicRmwOpClr, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 2) + i.asAtomicRmw(atomicRmwOpClr, x0VReg, x1VReg, x2VReg, 2) }}, {want: "0210e138", setup: func(i *instruction) { - i.asAtomicRmw(atomicRmwOpClr, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 1) + i.asAtomicRmw(atomicRmwOpClr, x0VReg, x1VReg, x2VReg, 1) }}, {want: "0210e1f8", setup: func(i *instruction) { - i.asAtomicRmw(atomicRmwOpClr, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 8) + i.asAtomicRmw(atomicRmwOpClr, x0VReg, x1VReg, x2VReg, 8) }}, {want: "0210e1b8", setup: func(i *instruction) { - i.asAtomicRmw(atomicRmwOpClr, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 4) + i.asAtomicRmw(atomicRmwOpClr, x0VReg, x1VReg, x2VReg, 4) }}, {want: "0210e178", setup: func(i *instruction) { - i.asAtomicRmw(atomicRmwOpClr, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 2) + i.asAtomicRmw(atomicRmwOpClr, x0VReg, x1VReg, x2VReg, 2) }}, {want: "0210e138", setup: func(i *instruction) { - i.asAtomicRmw(atomicRmwOpClr, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 1) + i.asAtomicRmw(atomicRmwOpClr, x0VReg, x1VReg, x2VReg, 1) }}, {want: "0230e1b8", setup: func(i *instruction) { - i.asAtomicRmw(atomicRmwOpSet, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 4) + i.asAtomicRmw(atomicRmwOpSet, x0VReg, x1VReg, x2VReg, 4) }}, {want: "0230e178", setup: func(i *instruction) { - i.asAtomicRmw(atomicRmwOpSet, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 2) + i.asAtomicRmw(atomicRmwOpSet, x0VReg, x1VReg, x2VReg, 2) }}, {want: "0230e138", setup: func(i *instruction) { - i.asAtomicRmw(atomicRmwOpSet, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 1) + i.asAtomicRmw(atomicRmwOpSet, x0VReg, x1VReg, x2VReg, 1) }}, {want: "0230e1f8", setup: func(i *instruction) { - i.asAtomicRmw(atomicRmwOpSet, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 8) + i.asAtomicRmw(atomicRmwOpSet, x0VReg, x1VReg, x2VReg, 8) }}, {want: "0230e1b8", setup: func(i *instruction) { - i.asAtomicRmw(atomicRmwOpSet, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 4) + i.asAtomicRmw(atomicRmwOpSet, x0VReg, x1VReg, x2VReg, 4) }}, {want: "0230e178", setup: func(i *instruction) { - i.asAtomicRmw(atomicRmwOpSet, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 2) + i.asAtomicRmw(atomicRmwOpSet, x0VReg, x1VReg, x2VReg, 2) }}, {want: "0230e138", setup: func(i *instruction) { - i.asAtomicRmw(atomicRmwOpSet, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 1) + i.asAtomicRmw(atomicRmwOpSet, x0VReg, x1VReg, x2VReg, 1) }}, {want: "0220e1b8", setup: func(i *instruction) { - i.asAtomicRmw(atomicRmwOpEor, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 4) + i.asAtomicRmw(atomicRmwOpEor, x0VReg, x1VReg, x2VReg, 4) }}, {want: "0220e178", setup: func(i *instruction) { - i.asAtomicRmw(atomicRmwOpEor, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 2) + i.asAtomicRmw(atomicRmwOpEor, x0VReg, x1VReg, x2VReg, 2) }}, {want: "0220e138", setup: func(i *instruction) { - i.asAtomicRmw(atomicRmwOpEor, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 1) + i.asAtomicRmw(atomicRmwOpEor, x0VReg, x1VReg, x2VReg, 1) }}, {want: "0220e1f8", setup: func(i *instruction) { - i.asAtomicRmw(atomicRmwOpEor, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 8) + i.asAtomicRmw(atomicRmwOpEor, x0VReg, x1VReg, x2VReg, 8) }}, {want: "0220e1b8", setup: func(i *instruction) { - i.asAtomicRmw(atomicRmwOpEor, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 4) + i.asAtomicRmw(atomicRmwOpEor, x0VReg, x1VReg, x2VReg, 4) }}, {want: "0220e178", setup: func(i *instruction) { - i.asAtomicRmw(atomicRmwOpEor, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 2) + i.asAtomicRmw(atomicRmwOpEor, x0VReg, x1VReg, x2VReg, 2) }}, {want: "0220e138", setup: func(i *instruction) { - i.asAtomicRmw(atomicRmwOpEor, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 1) + i.asAtomicRmw(atomicRmwOpEor, x0VReg, x1VReg, x2VReg, 1) }}, {want: "0280e1b8", setup: func(i *instruction) { - i.asAtomicRmw(atomicRmwOpSwp, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 4) + i.asAtomicRmw(atomicRmwOpSwp, x0VReg, x1VReg, x2VReg, 4) }}, {want: "0280e178", setup: func(i *instruction) { - i.asAtomicRmw(atomicRmwOpSwp, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 2) + i.asAtomicRmw(atomicRmwOpSwp, x0VReg, x1VReg, x2VReg, 2) }}, {want: "0280e138", setup: func(i *instruction) { - i.asAtomicRmw(atomicRmwOpSwp, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 1) + i.asAtomicRmw(atomicRmwOpSwp, x0VReg, x1VReg, x2VReg, 1) }}, {want: "0280e1f8", setup: func(i *instruction) { - i.asAtomicRmw(atomicRmwOpSwp, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 8) + i.asAtomicRmw(atomicRmwOpSwp, x0VReg, x1VReg, x2VReg, 8) }}, {want: "0280e1b8", setup: func(i *instruction) { - i.asAtomicRmw(atomicRmwOpSwp, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 4) + i.asAtomicRmw(atomicRmwOpSwp, x0VReg, x1VReg, x2VReg, 4) }}, {want: "0280e178", setup: func(i *instruction) { - i.asAtomicRmw(atomicRmwOpSwp, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 2) + i.asAtomicRmw(atomicRmwOpSwp, x0VReg, x1VReg, x2VReg, 2) }}, {want: "0280e138", setup: func(i *instruction) { - i.asAtomicRmw(atomicRmwOpSwp, operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 1) + i.asAtomicRmw(atomicRmwOpSwp, x0VReg, x1VReg, x2VReg, 1) }}, {want: "02fce188", setup: func(i *instruction) { - i.asAtomicCas(operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 4) + i.asAtomicCas(x0VReg, x1VReg, x2VReg, 4) }}, {want: "02fce148", setup: func(i *instruction) { - i.asAtomicCas(operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 2) + i.asAtomicCas(x0VReg, x1VReg, x2VReg, 2) }}, {want: "02fce108", setup: func(i *instruction) { - i.asAtomicCas(operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 1) + i.asAtomicCas(x0VReg, x1VReg, x2VReg, 1) }}, {want: "02fce1c8", setup: func(i *instruction) { - i.asAtomicCas(operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 8) + i.asAtomicCas(x0VReg, x1VReg, x2VReg, 8) }}, {want: "02fce188", setup: func(i *instruction) { - i.asAtomicCas(operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 4) + i.asAtomicCas(x0VReg, x1VReg, x2VReg, 4) }}, {want: "02fce148", setup: func(i *instruction) { - i.asAtomicCas(operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 2) + i.asAtomicCas(x0VReg, x1VReg, x2VReg, 2) }}, {want: "02fce108", setup: func(i *instruction) { - i.asAtomicCas(operandNR(x0VReg), operandNR(x1VReg), operandNR(x2VReg), 1) + i.asAtomicCas(x0VReg, x1VReg, x2VReg, 1) }}, {want: "01fcdf88", setup: func(i *instruction) { - i.asAtomicLoad(operandNR(x0VReg), operandNR(x1VReg), 4) + i.asAtomicLoad(x0VReg, x1VReg, 4) }}, {want: "01fcdf48", setup: func(i *instruction) { - i.asAtomicLoad(operandNR(x0VReg), operandNR(x1VReg), 2) + i.asAtomicLoad(x0VReg, x1VReg, 2) }}, {want: "01fcdf08", setup: func(i *instruction) { - i.asAtomicLoad(operandNR(x0VReg), operandNR(x1VReg), 1) + i.asAtomicLoad(x0VReg, x1VReg, 1) }}, {want: "01fcdfc8", setup: func(i *instruction) { - i.asAtomicLoad(operandNR(x0VReg), operandNR(x1VReg), 8) + i.asAtomicLoad(x0VReg, x1VReg, 8) }}, {want: "01fcdf88", setup: func(i *instruction) { - i.asAtomicLoad(operandNR(x0VReg), operandNR(x1VReg), 4) + i.asAtomicLoad(x0VReg, x1VReg, 4) }}, {want: "01fcdf48", setup: func(i *instruction) { - i.asAtomicLoad(operandNR(x0VReg), operandNR(x1VReg), 2) + i.asAtomicLoad(x0VReg, x1VReg, 2) }}, {want: "01fcdf08", setup: func(i *instruction) { - i.asAtomicLoad(operandNR(x0VReg), operandNR(x1VReg), 1) + i.asAtomicLoad(x0VReg, x1VReg, 1) }}, {want: "01fc9f88", setup: func(i *instruction) { i.asAtomicStore(operandNR(x0VReg), operandNR(x1VReg), 4) @@ -1823,7 +1824,7 @@ func TestInstruction_encode(t *testing.T) { for _, dst64bit := range trueFalse { i := &instruction{prev: cur} cur.next = i - i.asIntToFpu(operandNR(v2VReg), operandNR(x10VReg), rnSigned, src64bit, dst64bit) + i.asIntToFpu(v2VReg, operandNR(x10VReg), rnSigned, src64bit, dst64bit) cur = i } } @@ -1838,7 +1839,7 @@ func TestInstruction_encode(t *testing.T) { for _, dst64bit := range trueFalse { i := &instruction{prev: cur} cur.next = i - i.asFpuToInt(operandNR(v2VReg), operandNR(x10VReg), rnSigned, src64bit, dst64bit) + i.asFpuToInt(v2VReg, operandNR(x10VReg), rnSigned, src64bit, dst64bit) cur = i } } @@ -2232,12 +2233,13 @@ func TestInstruction_encoding_store_encoding(t *testing.T) { var i *instruction switch tc.k { case store8, store16, store32, store64, fpuStore32, fpuStore64, fpuStore128: - i = &instruction{kind: tc.k, amode: tc.amode, rn: operandNR(tc.rn)} + i = &instruction{kind: tc.k, rn: operandNR(tc.rn)} case uLoad8, uLoad16, uLoad32, uLoad64, sLoad8, sLoad16, sLoad32, fpuLoad32, fpuLoad64, fpuLoad128: - i = &instruction{kind: tc.k, amode: tc.amode, rd: operandNR(tc.rn)} + i = &instruction{kind: tc.k, rd: tc.rn} default: t.Fatalf("unknown kind: %v", tc.k) } + i.setAmode(&tc.amode) _, _, m := newSetupWithMockContext() i.encode(m) // Note: for quick iteration we can use golang.org/x/arch package to verify the encoding. diff --git a/internal/engine/wazevo/backend/isa/arm64/instr_test.go b/internal/engine/wazevo/backend/isa/arm64/instr_test.go index 6e4ec7ce2c..3b5b964eaf 100644 --- a/internal/engine/wazevo/backend/isa/arm64/instr_test.go +++ b/internal/engine/wazevo/backend/isa/arm64/instr_test.go @@ -49,7 +49,7 @@ func TestInstruction_String(t *testing.T) { i: &instruction{ kind: loadFpuConst32, u1: uint64(math.Float32bits(3.0)), - rd: operandNR(regalloc.VReg(0).SetRegType(regalloc.RegTypeFloat)), + rd: regalloc.VReg(0).SetRegType(regalloc.RegTypeFloat), }, exp: "ldr s0?, #8; b 8; data.f32 3.000000", }, @@ -57,7 +57,7 @@ func TestInstruction_String(t *testing.T) { i: &instruction{ kind: loadFpuConst64, u1: math.Float64bits(12345.987491), - rd: operandNR(regalloc.VReg(0).SetRegType(regalloc.RegTypeFloat)), + rd: regalloc.VReg(0).SetRegType(regalloc.RegTypeFloat), }, exp: "ldr d0?, #8; b 16; data.f64 12345.987491", }, diff --git a/internal/engine/wazevo/backend/isa/arm64/lower_constant.go b/internal/engine/wazevo/backend/isa/arm64/lower_constant.go index 698b382d46..6c6824fb0a 100644 --- a/internal/engine/wazevo/backend/isa/arm64/lower_constant.go +++ b/internal/engine/wazevo/backend/isa/arm64/lower_constant.go @@ -284,18 +284,18 @@ func (m *machine) load64bitConst(c int64, dst regalloc.VReg) { func (m *machine) insertMOVZ(dst regalloc.VReg, v uint64, shift int, dst64 bool) { instr := m.allocateInstr() - instr.asMOVZ(dst, v, uint64(shift), dst64) + instr.asMOVZ(dst, v, uint32(shift), dst64) m.insert(instr) } func (m *machine) insertMOVK(dst regalloc.VReg, v uint64, shift int, dst64 bool) { instr := m.allocateInstr() - instr.asMOVK(dst, v, uint64(shift), dst64) + instr.asMOVK(dst, v, uint32(shift), dst64) m.insert(instr) } func (m *machine) insertMOVN(dst regalloc.VReg, v uint64, shift int, dst64 bool) { instr := m.allocateInstr() - instr.asMOVN(dst, v, uint64(shift), dst64) + instr.asMOVN(dst, v, uint32(shift), dst64) m.insert(instr) } diff --git a/internal/engine/wazevo/backend/isa/arm64/lower_instr.go b/internal/engine/wazevo/backend/isa/arm64/lower_instr.go index 2bb234e8c1..048bf32040 100644 --- a/internal/engine/wazevo/backend/isa/arm64/lower_instr.go +++ b/internal/engine/wazevo/backend/isa/arm64/lower_instr.go @@ -52,11 +52,11 @@ func (m *machine) lowerBrTable(i *ssa.Instruction) { maxIndexReg := m.compiler.AllocateVReg(ssa.TypeI32) m.lowerConstantI32(maxIndexReg, int32(len(targets)-1)) subs := m.allocateInstr() - subs.asALU(aluOpSubS, operandNR(xzrVReg), indexOperand, operandNR(maxIndexReg), false) + subs.asALU(aluOpSubS, xzrVReg, indexOperand, operandNR(maxIndexReg), false) m.insert(subs) csel := m.allocateInstr() adjustedIndex := m.compiler.AllocateVReg(ssa.TypeI32) - csel.asCSel(operandNR(adjustedIndex), operandNR(maxIndexReg), indexOperand, hs, false) + csel.asCSel(adjustedIndex, operandNR(maxIndexReg), indexOperand, hs, false) m.insert(csel) brSequence := m.allocateInstr() @@ -249,7 +249,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { rc := m.getOperand_NR(m.compiler.ValueDefinition(c), extModeNone) rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) m.lowerSelectVec(rc, rn, rm, rd) } else { m.lowerSelect(c, x, y, instr.Return()) @@ -270,7 +270,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { x, ctx := instr.Arg2() result := instr.Return() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(result)) + rd := m.compiler.VRegOf(result) ctxVReg := m.compiler.VRegOf(ctx) m.lowerFpuToInt(rd, rn, ctxVReg, true, x.Type() == ssa.TypeF64, result.Type().Bits() == 64, op == ssa.OpcodeFcvtToSintSat) @@ -278,7 +278,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { x, ctx := instr.Arg2() result := instr.Return() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(result)) + rd := m.compiler.VRegOf(result) ctxVReg := m.compiler.VRegOf(ctx) m.lowerFpuToInt(rd, rn, ctxVReg, false, x.Type() == ssa.TypeF64, result.Type().Bits() == 64, op == ssa.OpcodeFcvtToUintSat) @@ -286,25 +286,25 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { x := instr.Arg() result := instr.Return() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(result)) + rd := m.compiler.VRegOf(result) m.lowerIntToFpu(rd, rn, true, x.Type() == ssa.TypeI64, result.Type().Bits() == 64) case ssa.OpcodeFcvtFromUint: x := instr.Arg() result := instr.Return() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(result)) + rd := m.compiler.VRegOf(result) m.lowerIntToFpu(rd, rn, false, x.Type() == ssa.TypeI64, result.Type().Bits() == 64) case ssa.OpcodeFdemote: v := instr.Arg() rn := m.getOperand_NR(m.compiler.ValueDefinition(v), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) cnt := m.allocateInstr() cnt.asFpuRR(fpuUniOpCvt64To32, rd, rn, false) m.insert(cnt) case ssa.OpcodeFpromote: v := instr.Arg() rn := m.getOperand_NR(m.compiler.ValueDefinition(v), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) cnt := m.allocateInstr() cnt.asFpuRR(fpuUniOpCvt32To64, rd, rn, true) m.insert(cnt) @@ -343,15 +343,15 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { ctxVReg := m.compiler.VRegOf(ctx) rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) m.lowerIDiv(ctxVReg, rd, rn, rm, x.Type() == ssa.TypeI64, op == ssa.OpcodeSdiv) case ssa.OpcodeSrem, ssa.OpcodeUrem: x, y, ctx := instr.Arg3() ctxVReg := m.compiler.VRegOf(ctx) rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) - m.lowerIRem(ctxVReg, rd, rn, rm, x.Type() == ssa.TypeI64, op == ssa.OpcodeSrem) + rd := m.compiler.VRegOf(instr.Return()) + m.lowerIRem(ctxVReg, rd, rn.nr(), rm, x.Type() == ssa.TypeI64, op == ssa.OpcodeSrem) case ssa.OpcodeVconst: result := m.compiler.VRegOf(instr.Return()) lo, hi := instr.VconstData() @@ -362,7 +362,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { x := instr.Arg() ins := m.allocateInstr() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) ins.asVecMisc(vecOpNot, rd, rn, vecArrangement16B) m.insert(ins) case ssa.OpcodeVbxor: @@ -382,12 +382,12 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) creg := m.getOperand_NR(m.compiler.ValueDefinition(c), extModeNone) - tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) + tmp := m.compiler.AllocateVReg(ssa.TypeV128) // creg is overwritten by BSL, so we need to move it to the result register before the instruction // in case when it is used somewhere else. mov := m.allocateInstr() - mov.asFpuMov128(tmp.nr(), creg.nr()) + mov.asFpuMov128(tmp, creg.nr()) m.insert(mov) ins := m.allocateInstr() @@ -396,7 +396,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { mov2 := m.allocateInstr() rd := m.compiler.VRegOf(instr.Return()) - mov2.asFpuMov128(rd, tmp.nr()) + mov2.asFpuMov128(rd, tmp) m.insert(mov2) case ssa.OpcodeVanyTrue, ssa.OpcodeVallTrue: x, lane := instr.ArgWithLane() @@ -405,12 +405,12 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { arr = ssaLaneToArrangement(lane) } rm := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) m.lowerVcheckTrue(op, rm, rd, arr) case ssa.OpcodeVhighBits: x, lane := instr.ArgWithLane() rm := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) arr := ssaLaneToArrangement(lane) m.lowerVhighBits(rm, rd, arr) case ssa.OpcodeVIadd: @@ -441,9 +441,9 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { panic("unsupported lane " + lane.String()) } - widenLo := m.allocateInstr().asVecShiftImm(widen, tmpLo, vv, operandShiftImm(0), loArr) - widenHi := m.allocateInstr().asVecShiftImm(widen, tmpHi, vv, operandShiftImm(0), hiArr) - addp := m.allocateInstr().asVecRRR(vecOpAddp, operandNR(m.compiler.VRegOf(instr.Return())), tmpLo, tmpHi, dstArr) + widenLo := m.allocateInstr().asVecShiftImm(widen, tmpLo.nr(), vv, operandShiftImm(0), loArr) + widenHi := m.allocateInstr().asVecShiftImm(widen, tmpHi.nr(), vv, operandShiftImm(0), hiArr) + addp := m.allocateInstr().asVecRRR(vecOpAddp, m.compiler.VRegOf(instr.Return()), tmpLo, tmpHi, dstArr) m.insert(widenLo) m.insert(widenHi) m.insert(addp) @@ -493,7 +493,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { arr := ssaLaneToArrangement(lane) rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) m.lowerVIMul(rd, rn, rm, arr) case ssa.OpcodeVIabs: m.lowerVecMisc(vecOpAbs, instr) @@ -507,7 +507,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { arr := ssaLaneToArrangement(lane) rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) m.lowerVShift(op, rd, rn, rm, arr) case ssa.OpcodeVSqrt: m.lowerVecMisc(vecOpFsqrt, instr) @@ -547,18 +547,18 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { x, lane := instr.ArgWithLane() arr := ssaLaneToArrangement(lane) rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) m.lowerVfpuToInt(rd, rn, arr, op == ssa.OpcodeVFcvtToSintSat) case ssa.OpcodeVFcvtFromSint, ssa.OpcodeVFcvtFromUint: x, lane := instr.ArgWithLane() arr := ssaLaneToArrangement(lane) rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) m.lowerVfpuFromInt(rd, rn, arr, op == ssa.OpcodeVFcvtFromSint) case ssa.OpcodeSwidenLow, ssa.OpcodeUwidenLow: x, lane := instr.ArgWithLane() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) var arr vecArrangement switch lane { @@ -580,7 +580,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { case ssa.OpcodeSwidenHigh, ssa.OpcodeUwidenHigh: x, lane := instr.ArgWithLane() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) arr := ssaLaneToArrangement(lane) @@ -607,9 +607,9 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { } rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) - tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) + tmp := m.compiler.AllocateVReg(ssa.TypeV128) loQxtn := m.allocateInstr() hiQxtn := m.allocateInstr() @@ -628,7 +628,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { m.insert(hiQxtn) mov := m.allocateInstr() - mov.asFpuMov128(rd.nr(), tmp.nr()) + mov.asFpuMov128(rd, tmp) m.insert(mov) case ssa.OpcodeFvpromoteLow: x, lane := instr.ArgWithLane() @@ -637,7 +637,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { } ins := m.allocateInstr() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) ins.asVecMisc(vecOpFcvtl, rd, rn, vecArrangement2S) m.insert(ins) case ssa.OpcodeFvdemote: @@ -647,14 +647,14 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { } ins := m.allocateInstr() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) ins.asVecMisc(vecOpFcvtn, rd, rn, vecArrangement2S) m.insert(ins) case ssa.OpcodeExtractlane: x, index, signed, lane := instr.ExtractlaneData() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) mov := m.allocateInstr() switch lane { @@ -680,12 +680,12 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { x, y, index, lane := instr.InsertlaneData() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) - tmpReg := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) + rd := m.compiler.VRegOf(instr.Return()) + tmpReg := m.compiler.AllocateVReg(ssa.TypeV128) // Initially mov rn to tmp. mov1 := m.allocateInstr() - mov1.asFpuMov128(tmpReg.nr(), rn.nr()) + mov1.asFpuMov128(tmpReg, rn.nr()) m.insert(mov1) // movToVec and vecMovElement do not clear the remaining bits to zero, @@ -709,14 +709,14 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { // Finally mov tmp to rd. mov3 := m.allocateInstr() - mov3.asFpuMov128(rd.nr(), tmpReg.nr()) + mov3.asFpuMov128(rd, tmpReg) m.insert(mov3) case ssa.OpcodeSwizzle: x, y, lane := instr.Arg2WithLane() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) arr := ssaLaneToArrangement(lane) @@ -729,14 +729,14 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { x, y, lane1, lane2 := instr.ShuffleData() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) m.lowerShuffle(rd, rn, rm, lane1, lane2) case ssa.OpcodeSplat: x, lane := instr.ArgWithLane() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) dup := m.allocateInstr() switch lane { @@ -760,12 +760,12 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { xx, yy := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone), m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) tmp, tmp2 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)), operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) - m.insert(m.allocateInstr().asVecRRR(vecOpSmull, tmp, xx, yy, vecArrangement8H)) - m.insert(m.allocateInstr().asVecRRR(vecOpSmull2, tmp2, xx, yy, vecArrangement8H)) - m.insert(m.allocateInstr().asVecRRR(vecOpAddp, tmp, tmp, tmp2, vecArrangement4S)) + m.insert(m.allocateInstr().asVecRRR(vecOpSmull, tmp.nr(), xx, yy, vecArrangement8H)) + m.insert(m.allocateInstr().asVecRRR(vecOpSmull2, tmp2.nr(), xx, yy, vecArrangement8H)) + m.insert(m.allocateInstr().asVecRRR(vecOpAddp, tmp.nr(), tmp, tmp2, vecArrangement4S)) - rd := operandNR(m.compiler.VRegOf(instr.Return())) - m.insert(m.allocateInstr().asFpuMov128(rd.nr(), tmp.nr())) + rd := m.compiler.VRegOf(instr.Return()) + m.insert(m.allocateInstr().asFpuMov128(rd, tmp.nr())) case ssa.OpcodeLoadSplat: ptr, offset, lane := instr.LoadSplatData() @@ -794,7 +794,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { m.executableContext.FlushPendingInstructions() } -func (m *machine) lowerShuffle(rd, rn, rm operand, lane1, lane2 uint64) { +func (m *machine) lowerShuffle(rd regalloc.VReg, rn, rm operand, lane1, lane2 uint64) { // `tbl2` requires 2 consecutive registers, so we arbitrarily pick v29, v30. vReg, wReg := v29VReg, v30VReg @@ -822,7 +822,7 @@ func (m *machine) lowerShuffle(rd, rn, rm operand, lane1, lane2 uint64) { m.insert(tbl2) } -func (m *machine) lowerVShift(op ssa.Opcode, rd, rn, rm operand, arr vecArrangement) { +func (m *machine) lowerVShift(op ssa.Opcode, rd regalloc.VReg, rn, rm operand, arr vecArrangement) { var modulo byte switch arr { case vecArrangement16B: @@ -847,13 +847,13 @@ func (m *machine) lowerVShift(op ssa.Opcode, rd, rn, rm operand, arr vecArrangem if op != ssa.OpcodeVIshl { // Negate the amount to make this as right shift. neg := m.allocateInstr() - neg.asALU(aluOpSub, rtmp, operandNR(xzrVReg), rtmp, true) + neg.asALU(aluOpSub, rtmp.nr(), operandNR(xzrVReg), rtmp, true) m.insert(neg) } // Copy the shift amount into a vector register as sshl/ushl requires it to be there. dup := m.allocateInstr() - dup.asVecDup(vtmp, rtmp, arr) + dup.asVecDup(vtmp.nr(), rtmp, arr) m.insert(dup) if op == ssa.OpcodeVIshl || op == ssa.OpcodeVSshr { @@ -867,7 +867,7 @@ func (m *machine) lowerVShift(op ssa.Opcode, rd, rn, rm operand, arr vecArrangem } } -func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangement) { +func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm operand, rd regalloc.VReg, arr vecArrangement) { tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) // Special case VallTrue for i64x2. @@ -878,11 +878,11 @@ func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangem // cset dst, eq ins := m.allocateInstr() - ins.asVecMisc(vecOpCmeq0, tmp, rm, vecArrangement2D) + ins.asVecMisc(vecOpCmeq0, tmp.nr(), rm, vecArrangement2D) m.insert(ins) addp := m.allocateInstr() - addp.asVecRRR(vecOpAddp, tmp, tmp, tmp, vecArrangement2D) + addp.asVecRRR(vecOpAddp, tmp.nr(), tmp, tmp, vecArrangement2D) m.insert(addp) fcmp := m.allocateInstr() @@ -890,7 +890,7 @@ func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangem m.insert(fcmp) cset := m.allocateInstr() - cset.asCSet(rd.nr(), false, eq) + cset.asCSet(rd, false, eq) m.insert(cset) return @@ -900,10 +900,10 @@ func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangem ins := m.allocateInstr() if op == ssa.OpcodeVanyTrue { // umaxp v4?.16b, v2?.16b, v2?.16b - ins.asVecRRR(vecOpUmaxp, tmp, rm, rm, vecArrangement16B) + ins.asVecRRR(vecOpUmaxp, tmp.nr(), rm, rm, vecArrangement16B) } else { // uminv d4?, v2?.4s - ins.asVecLanes(vecOpUminv, tmp, rm, arr) + ins.asVecLanes(vecOpUminv, tmp.nr(), rm, arr) } m.insert(ins) @@ -917,15 +917,15 @@ func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangem m.insert(movv) fc := m.allocateInstr() - fc.asCCmpImm(rd, uint64(0), al, 0, true) + fc.asCCmpImm(operandNR(rd), uint64(0), al, 0, true) m.insert(fc) cset := m.allocateInstr() - cset.asCSet(rd.nr(), false, ne) + cset.asCSet(rd, false, ne) m.insert(cset) } -func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { +func (m *machine) lowerVhighBits(rm operand, rd regalloc.VReg, arr vecArrangement) { r0 := operandNR(m.compiler.AllocateVReg(ssa.TypeI64)) v0 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) v1 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) @@ -947,7 +947,7 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { // Right arithmetic shift on the original vector and store the result into v1. So we have: // v1[i] = 0xff if vi<0, 0 otherwise. sshr := m.allocateInstr() - sshr.asVecShiftImm(vecOpSshr, v1, rm, operandShiftImm(7), vecArrangement16B) + sshr.asVecShiftImm(vecOpSshr, v1.nr(), rm, operandShiftImm(7), vecArrangement16B) m.insert(sshr) // Load the bit mask into r0. @@ -958,7 +958,7 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { // dup r0 to v0. dup := m.allocateInstr() - dup.asVecDup(v0, r0, vecArrangement2D) + dup.asVecDup(v0.nr(), r0, vecArrangement2D) m.insert(dup) // Lane-wise logical AND with the bit mask, meaning that we have @@ -967,23 +967,23 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { // Below, we use the following notation: // wi := (1 << i) if vi<0, 0 otherwise. and := m.allocateInstr() - and.asVecRRR(vecOpAnd, v1, v1, v0, vecArrangement16B) + and.asVecRRR(vecOpAnd, v1.nr(), v1, v0, vecArrangement16B) m.insert(and) // Swap the lower and higher 8 byte elements, and write it into v0, meaning that we have // v0[i] = w(i+8) if i < 8, w(i-8) otherwise. ext := m.allocateInstr() - ext.asVecExtract(v0, v1, v1, vecArrangement16B, uint32(8)) + ext.asVecExtract(v0.nr(), v1, v1, vecArrangement16B, uint32(8)) m.insert(ext) // v = [w0, w8, ..., w7, w15] zip1 := m.allocateInstr() - zip1.asVecPermute(vecOpZip1, v0, v1, v0, vecArrangement16B) + zip1.asVecPermute(vecOpZip1, v0.nr(), v1, v0, vecArrangement16B) m.insert(zip1) // v.h[0] = w0 + ... + w15 addv := m.allocateInstr() - addv.asVecLanes(vecOpAddv, v0, v0, vecArrangement8H) + addv.asVecLanes(vecOpAddv, v0.nr(), v0, vecArrangement8H) m.insert(addv) // Extract the v.h[0] as the result. @@ -1006,7 +1006,7 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { // Right arithmetic shift on the original vector and store the result into v1. So we have: // v[i] = 0xffff if vi<0, 0 otherwise. sshr := m.allocateInstr() - sshr.asVecShiftImm(vecOpSshr, v1, rm, operandShiftImm(15), vecArrangement8H) + sshr.asVecShiftImm(vecOpSshr, v1.nr(), rm, operandShiftImm(15), vecArrangement8H) m.insert(sshr) // Load the bit mask into r0. @@ -1014,26 +1014,26 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { // dup r0 to vector v0. dup := m.allocateInstr() - dup.asVecDup(v0, r0, vecArrangement2D) + dup.asVecDup(v0.nr(), r0, vecArrangement2D) m.insert(dup) lsl := m.allocateInstr() - lsl.asALUShift(aluOpLsl, r0, r0, operandShiftImm(4), true) + lsl.asALUShift(aluOpLsl, r0.nr(), r0, operandShiftImm(4), true) m.insert(lsl) movv := m.allocateInstr() - movv.asMovToVec(v0, r0, vecArrangementD, vecIndex(1)) + movv.asMovToVec(v0.nr(), r0, vecArrangementD, vecIndex(1)) m.insert(movv) // Lane-wise logical AND with the bitmask, meaning that we have // v[i] = (1 << i) if vi<0, 0 otherwise for i=0..3 // = (1 << (i+4)) if vi<0, 0 otherwise for i=3..7 and := m.allocateInstr() - and.asVecRRR(vecOpAnd, v0, v1, v0, vecArrangement16B) + and.asVecRRR(vecOpAnd, v0.nr(), v1, v0, vecArrangement16B) m.insert(and) addv := m.allocateInstr() - addv.asVecLanes(vecOpAddv, v0, v0, vecArrangement8H) + addv.asVecLanes(vecOpAddv, v0.nr(), v0, vecArrangement8H) m.insert(addv) movfv := m.allocateInstr() @@ -1055,7 +1055,7 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { // Right arithmetic shift on the original vector and store the result into v1. So we have: // v[i] = 0xffffffff if vi<0, 0 otherwise. sshr := m.allocateInstr() - sshr.asVecShiftImm(vecOpSshr, v1, rm, operandShiftImm(31), vecArrangement4S) + sshr.asVecShiftImm(vecOpSshr, v1.nr(), rm, operandShiftImm(31), vecArrangement4S) m.insert(sshr) // Load the bit mask into r0. @@ -1063,26 +1063,26 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { // dup r0 to vector v0. dup := m.allocateInstr() - dup.asVecDup(v0, r0, vecArrangement2D) + dup.asVecDup(v0.nr(), r0, vecArrangement2D) m.insert(dup) lsl := m.allocateInstr() - lsl.asALUShift(aluOpLsl, r0, r0, operandShiftImm(2), true) + lsl.asALUShift(aluOpLsl, r0.nr(), r0, operandShiftImm(2), true) m.insert(lsl) movv := m.allocateInstr() - movv.asMovToVec(v0, r0, vecArrangementD, vecIndex(1)) + movv.asMovToVec(v0.nr(), r0, vecArrangementD, vecIndex(1)) m.insert(movv) // Lane-wise logical AND with the bitmask, meaning that we have // v[i] = (1 << i) if vi<0, 0 otherwise for i in [0, 1] // = (1 << (i+4)) if vi<0, 0 otherwise for i in [2, 3] and := m.allocateInstr() - and.asVecRRR(vecOpAnd, v0, v1, v0, vecArrangement16B) + and.asVecRRR(vecOpAnd, v0.nr(), v1, v0, vecArrangement16B) m.insert(and) addv := m.allocateInstr() - addv.asVecLanes(vecOpAddv, v0, v0, vecArrangement4S) + addv.asVecLanes(vecOpAddv, v0.nr(), v0, vecArrangement4S) m.insert(addv) movfv := m.allocateInstr() @@ -1102,21 +1102,21 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { // Move the higher 64-bit int into r0. movv1 := m.allocateInstr() - movv1.asMovFromVec(r0, rm, vecArrangementD, vecIndex(1), false) + movv1.asMovFromVec(r0.nr(), rm, vecArrangementD, vecIndex(1), false) m.insert(movv1) // Move the sign bit into the least significant bit. lsr1 := m.allocateInstr() - lsr1.asALUShift(aluOpLsr, r0, r0, operandShiftImm(63), true) + lsr1.asALUShift(aluOpLsr, r0.nr(), r0, operandShiftImm(63), true) m.insert(lsr1) lsr2 := m.allocateInstr() - lsr2.asALUShift(aluOpLsr, rd, rd, operandShiftImm(63), true) + lsr2.asALUShift(aluOpLsr, rd, operandNR(rd), operandShiftImm(63), true) m.insert(lsr2) // rd = (r0<<1) | rd lsl := m.allocateInstr() - lsl.asALU(aluOpAdd, rd, rd, operandSR(r0.nr(), 1, shiftOpLSL), false) + lsl.asALU(aluOpAdd, rd, operandNR(rd), operandSR(r0.nr(), 1, shiftOpLSL), false) m.insert(lsl) default: panic("Unsupported " + arr.String()) @@ -1128,7 +1128,7 @@ func (m *machine) lowerVecMisc(op vecOp, instr *ssa.Instruction) { arr := ssaLaneToArrangement(lane) ins := m.allocateInstr() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) ins.asVecMisc(op, rd, rn, arr) m.insert(ins) } @@ -1137,22 +1137,22 @@ func (m *machine) lowerVecRRR(op vecOp, x, y, ret ssa.Value, arr vecArrangement) ins := m.allocateInstr() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(ret)) + rd := m.compiler.VRegOf(ret) ins.asVecRRR(op, rd, rn, rm, arr) m.insert(ins) } -func (m *machine) lowerVIMul(rd, rn, rm operand, arr vecArrangement) { +func (m *machine) lowerVIMul(rd regalloc.VReg, rn, rm operand, arr vecArrangement) { if arr != vecArrangement2D { mul := m.allocateInstr() mul.asVecRRR(vecOpMul, rd, rn, rm, arr) m.insert(mul) } else { - tmp1 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) - tmp2 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) - tmp3 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) + tmp1 := m.compiler.AllocateVReg(ssa.TypeV128) + tmp2 := m.compiler.AllocateVReg(ssa.TypeV128) + tmp3 := m.compiler.AllocateVReg(ssa.TypeV128) - tmpRes := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) + tmpRes := m.compiler.AllocateVReg(ssa.TypeV128) // Following the algorithm in https://chromium-review.googlesource.com/c/v8/v8/+/1781696 rev64 := m.allocateInstr() @@ -1160,7 +1160,7 @@ func (m *machine) lowerVIMul(rd, rn, rm operand, arr vecArrangement) { m.insert(rev64) mul := m.allocateInstr() - mul.asVecRRR(vecOpMul, tmp2, tmp2, rn, vecArrangement4S) + mul.asVecRRR(vecOpMul, tmp2, operandNR(tmp2), rn, vecArrangement4S) m.insert(mul) xtn1 := m.allocateInstr() @@ -1168,7 +1168,7 @@ func (m *machine) lowerVIMul(rd, rn, rm operand, arr vecArrangement) { m.insert(xtn1) addp := m.allocateInstr() - addp.asVecRRR(vecOpAddp, tmp2, tmp2, tmp2, vecArrangement4S) + addp.asVecRRR(vecOpAddp, tmp2, operandNR(tmp2), operandNR(tmp2), vecArrangement4S) m.insert(addp) xtn2 := m.allocateInstr() @@ -1179,15 +1179,15 @@ func (m *machine) lowerVIMul(rd, rn, rm operand, arr vecArrangement) { // In short, in UMLAL instruction, the result register is also one of the source register, and // the value on the result register is significant. shll := m.allocateInstr() - shll.asVecMisc(vecOpShll, tmpRes, tmp2, vecArrangement2S) + shll.asVecMisc(vecOpShll, tmpRes, operandNR(tmp2), vecArrangement2S) m.insert(shll) umlal := m.allocateInstr() - umlal.asVecRRRRewrite(vecOpUmlal, tmpRes, tmp3, tmp1, vecArrangement2S) + umlal.asVecRRRRewrite(vecOpUmlal, tmpRes, operandNR(tmp3), operandNR(tmp1), vecArrangement2S) m.insert(umlal) mov := m.allocateInstr() - mov.asFpuMov128(rd.nr(), tmpRes.nr()) + mov.asFpuMov128(rd, tmpRes) m.insert(mov) } } @@ -1203,7 +1203,7 @@ func (m *machine) lowerVMinMaxPseudo(instr *ssa.Instruction, max bool) { // BSL modifies the destination register, so we need to use a temporary register so that // the actual definition of the destination register happens *after* the BSL instruction. // That way, we can force the spill instruction to be inserted after the BSL instruction. - tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) + tmp := m.compiler.AllocateVReg(ssa.TypeV128) fcmgt := m.allocateInstr() if max { @@ -1220,17 +1220,17 @@ func (m *machine) lowerVMinMaxPseudo(instr *ssa.Instruction, max bool) { res := operandNR(m.compiler.VRegOf(instr.Return())) mov2 := m.allocateInstr() - mov2.asFpuMov128(res.nr(), tmp.nr()) + mov2.asFpuMov128(res.nr(), tmp) m.insert(mov2) } -func (m *machine) lowerIRem(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bit, signed bool) { +func (m *machine) lowerIRem(execCtxVReg regalloc.VReg, rd, rn regalloc.VReg, rm operand, _64bit, signed bool) { div := m.allocateInstr() if signed { - div.asALU(aluOpSDiv, rd, rn, rm, _64bit) + div.asALU(aluOpSDiv, rd, operandNR(rn), rm, _64bit) } else { - div.asALU(aluOpUDiv, rd, rn, rm, _64bit) + div.asALU(aluOpUDiv, rd, operandNR(rn), rm, _64bit) } m.insert(div) @@ -1239,11 +1239,11 @@ func (m *machine) lowerIRem(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bi // rd = rn-rd*rm by MSUB instruction. msub := m.allocateInstr() - msub.asALURRRR(aluOpMSub, rd, rd, rm, rn, _64bit) + msub.asALURRRR(aluOpMSub, rd, operandNR(rd), rm, rn, _64bit) m.insert(msub) } -func (m *machine) lowerIDiv(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bit, signed bool) { +func (m *machine) lowerIDiv(execCtxVReg, rd regalloc.VReg, rn, rm operand, _64bit, signed bool) { div := m.allocateInstr() if signed { @@ -1260,7 +1260,7 @@ func (m *machine) lowerIDiv(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bi // We need to check the signed overflow which happens iff "math.MinInt{32,64} / -1" minusOneCheck := m.allocateInstr() // Sets eq condition if rm == -1. - minusOneCheck.asALU(aluOpAddS, operandNR(xzrVReg), rm, operandImm12(1, 0), _64bit) + minusOneCheck.asALU(aluOpAddS, xzrVReg, rm, operandImm12(1, 0), _64bit) m.insert(minusOneCheck) ccmp := m.allocateInstr() @@ -1290,20 +1290,20 @@ func (m *machine) exitIfNot(execCtxVReg regalloc.VReg, c cond, cond64bit bool, c func (m *machine) lowerFcopysign(x, y, ret ssa.Value) { rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - var tmpI, tmpF operand + var tmpI, tmpF regalloc.VReg _64 := x.Type() == ssa.TypeF64 if _64 { - tmpF = operandNR(m.compiler.AllocateVReg(ssa.TypeF64)) - tmpI = operandNR(m.compiler.AllocateVReg(ssa.TypeI64)) + tmpF = m.compiler.AllocateVReg(ssa.TypeF64) + tmpI = m.compiler.AllocateVReg(ssa.TypeI64) } else { - tmpF = operandNR(m.compiler.AllocateVReg(ssa.TypeF32)) - tmpI = operandNR(m.compiler.AllocateVReg(ssa.TypeI32)) + tmpF = m.compiler.AllocateVReg(ssa.TypeF32) + tmpI = m.compiler.AllocateVReg(ssa.TypeI32) } rd := m.compiler.VRegOf(ret) - m.lowerFcopysignImpl(operandNR(rd), rn, rm, tmpI, tmpF, _64) + m.lowerFcopysignImpl(rd, rn, rm, tmpI, tmpF, _64) } -func (m *machine) lowerFcopysignImpl(rd, rn, rm, tmpI, tmpF operand, _64bit bool) { +func (m *machine) lowerFcopysignImpl(rd regalloc.VReg, rn, rm operand, tmpI, tmpF regalloc.VReg, _64bit bool) { // This is exactly the same code emitted by GCC for "__builtin_copysign": // // mov x0, -9223372036854775808 @@ -1313,26 +1313,26 @@ func (m *machine) lowerFcopysignImpl(rd, rn, rm, tmpI, tmpF operand, _64bit bool setMSB := m.allocateInstr() if _64bit { - m.lowerConstantI64(tmpI.nr(), math.MinInt64) - setMSB.asMovToVec(tmpF, tmpI, vecArrangementD, vecIndex(0)) + m.lowerConstantI64(tmpI, math.MinInt64) + setMSB.asMovToVec(tmpF, operandNR(tmpI), vecArrangementD, vecIndex(0)) } else { - m.lowerConstantI32(tmpI.nr(), math.MinInt32) - setMSB.asMovToVec(tmpF, tmpI, vecArrangementS, vecIndex(0)) + m.lowerConstantI32(tmpI, math.MinInt32) + setMSB.asMovToVec(tmpF, operandNR(tmpI), vecArrangementS, vecIndex(0)) } m.insert(setMSB) - tmpReg := operandNR(m.compiler.AllocateVReg(ssa.TypeF64)) + tmpReg := m.compiler.AllocateVReg(ssa.TypeF64) mov := m.allocateInstr() - mov.asFpuMov64(tmpReg.nr(), rn.nr()) + mov.asFpuMov64(tmpReg, rn.nr()) m.insert(mov) vbit := m.allocateInstr() - vbit.asVecRRRRewrite(vecOpBit, tmpReg, rm, tmpF, vecArrangement8B) + vbit.asVecRRRRewrite(vecOpBit, tmpReg, rm, operandNR(tmpF), vecArrangement8B) m.insert(vbit) movDst := m.allocateInstr() - movDst.asFpuMov64(rd.nr(), tmpReg.nr()) + movDst.asFpuMov64(rd, tmpReg) m.insert(movDst) } @@ -1340,7 +1340,7 @@ func (m *machine) lowerBitcast(instr *ssa.Instruction) { v, dstType := instr.BitcastData() srcType := v.Type() rn := m.getOperand_NR(m.compiler.ValueDefinition(v), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) srcInt := srcType.IsInt() dstInt := dstType.IsInt() switch { @@ -1371,14 +1371,14 @@ func (m *machine) lowerBitcast(instr *ssa.Instruction) { func (m *machine) lowerFpuUniOp(op fpuUniOp, in, out ssa.Value) { rn := m.getOperand_NR(m.compiler.ValueDefinition(in), extModeNone) - rd := operandNR(m.compiler.VRegOf(out)) + rd := m.compiler.VRegOf(out) neg := m.allocateInstr() neg.asFpuRR(op, rd, rn, in.Type().Bits() == 64) m.insert(neg) } -func (m *machine) lowerFpuToInt(rd, rn operand, ctx regalloc.VReg, signed, src64bit, dst64bit, nonTrapping bool) { +func (m *machine) lowerFpuToInt(rd regalloc.VReg, rn operand, ctx regalloc.VReg, signed, src64bit, dst64bit, nonTrapping bool) { if !nonTrapping { // First of all, we have to clear the FPU flags. flagClear := m.allocateInstr() @@ -1405,7 +1405,7 @@ func (m *machine) lowerFpuToInt(rd, rn operand, ctx regalloc.VReg, signed, src64 // Check if the conversion was undefined by comparing the status with 1. // See https://developer.arm.com/documentation/ddi0595/2020-12/AArch64-Registers/FPSR--Floating-point-Status-Register alu := m.allocateInstr() - alu.asALU(aluOpSubS, operandNR(xzrVReg), operandNR(tmpReg), operandImm12(1, 0), true) + alu.asALU(aluOpSubS, xzrVReg, operandNR(tmpReg), operandImm12(1, 0), true) m.insert(alu) // If it is not undefined, we can return the result. @@ -1429,7 +1429,7 @@ func (m *machine) lowerFpuToInt(rd, rn operand, ctx regalloc.VReg, signed, src64 } } -func (m *machine) lowerIntToFpu(rd, rn operand, signed, src64bit, dst64bit bool) { +func (m *machine) lowerIntToFpu(rd regalloc.VReg, rn operand, signed, src64bit, dst64bit bool) { cvt := m.allocateInstr() cvt.asIntToFpu(rd, rn, signed, src64bit, dst64bit) m.insert(cvt) @@ -1456,7 +1456,7 @@ func (m *machine) lowerFpuBinOp(si *ssa.Instruction) { xDef, yDef := m.compiler.ValueDefinition(x), m.compiler.ValueDefinition(y) rn := m.getOperand_NR(xDef, extModeNone) rm := m.getOperand_NR(yDef, extModeNone) - rd := operandNR(m.compiler.VRegOf(si.Return())) + rd := m.compiler.VRegOf(si.Return()) instr.asFpuRRR(op, rd, rn, rm, x.Type().Bits() == 64) m.insert(instr) } @@ -1482,7 +1482,7 @@ func (m *machine) lowerSubOrAdd(si *ssa.Instruction, add bool) { case !add && yNegated: // rn+rm = x-(-y) = x-y aop = aluOpAdd } - rd := operandNR(m.compiler.VRegOf(si.Return())) + rd := m.compiler.VRegOf(si.Return()) alu := m.allocateInstr() alu.asALU(aop, rd, rn, rm, x.Type().Bits() == 64) m.insert(alu) @@ -1527,7 +1527,7 @@ func (m *machine) lowerIcmp(si *ssa.Instruction) { rn := m.getOperand_NR(m.compiler.ValueDefinition(x), ext) rm := m.getOperand_Imm12_ER_SR_NR(m.compiler.ValueDefinition(y), ext) alu := m.allocateInstr() - alu.asALU(aluOpSubS, operandNR(xzrVReg), rn, rm, in64bit) + alu.asALU(aluOpSubS, xzrVReg, rn, rm, in64bit) m.insert(alu) cset := m.allocateInstr() @@ -1542,7 +1542,7 @@ func (m *machine) lowerVIcmp(si *ssa.Instruction) { rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(si.Return())) + rd := m.compiler.VRegOf(si.Return()) switch flag { case eq: @@ -1554,7 +1554,7 @@ func (m *machine) lowerVIcmp(si *ssa.Instruction) { cmp.asVecRRR(vecOpCmeq, rd, rn, rm, arr) m.insert(cmp) not := m.allocateInstr() - not.asVecMisc(vecOpNot, rd, rd, vecArrangement16B) + not.asVecMisc(vecOpNot, rd, operandNR(rd), vecArrangement16B) m.insert(not) case ge: cmp := m.allocateInstr() @@ -1598,7 +1598,7 @@ func (m *machine) lowerVFcmp(si *ssa.Instruction) { rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(si.Return())) + rd := m.compiler.VRegOf(si.Return()) switch flag { case eq: @@ -1610,7 +1610,7 @@ func (m *machine) lowerVFcmp(si *ssa.Instruction) { cmp.asVecRRR(vecOpFcmeq, rd, rn, rm, arr) m.insert(cmp) not := m.allocateInstr() - not.asVecMisc(vecOpNot, rd, rd, vecArrangement16B) + not.asVecMisc(vecOpNot, rd, operandNR(rd), vecArrangement16B) m.insert(not) case ge: cmp := m.allocateInstr() @@ -1631,7 +1631,7 @@ func (m *machine) lowerVFcmp(si *ssa.Instruction) { } } -func (m *machine) lowerVfpuToInt(rd, rn operand, arr vecArrangement, signed bool) { +func (m *machine) lowerVfpuToInt(rd regalloc.VReg, rn operand, arr vecArrangement, signed bool) { cvt := m.allocateInstr() if signed { cvt.asVecMisc(vecOpFcvtzs, rd, rn, arr) @@ -1643,15 +1643,15 @@ func (m *machine) lowerVfpuToInt(rd, rn operand, arr vecArrangement, signed bool if arr == vecArrangement2D { narrow := m.allocateInstr() if signed { - narrow.asVecMisc(vecOpSqxtn, rd, rd, vecArrangement2S) + narrow.asVecMisc(vecOpSqxtn, rd, operandNR(rd), vecArrangement2S) } else { - narrow.asVecMisc(vecOpUqxtn, rd, rd, vecArrangement2S) + narrow.asVecMisc(vecOpUqxtn, rd, operandNR(rd), vecArrangement2S) } m.insert(narrow) } } -func (m *machine) lowerVfpuFromInt(rd, rn operand, arr vecArrangement, signed bool) { +func (m *machine) lowerVfpuFromInt(rd regalloc.VReg, rn operand, arr vecArrangement, signed bool) { cvt := m.allocateInstr() if signed { cvt.asVecMisc(vecOpScvtf, rd, rn, arr) @@ -1665,7 +1665,7 @@ func (m *machine) lowerShifts(si *ssa.Instruction, ext extMode, aluOp aluOp) { x, amount := si.Arg2() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), ext) rm := m.getOperand_ShiftImm_NR(m.compiler.ValueDefinition(amount), ext, x.Type().Bits()) - rd := operandNR(m.compiler.VRegOf(si.Return())) + rd := m.compiler.VRegOf(si.Return()) alu := m.allocateInstr() alu.asALUShift(aluOp, rd, rn, rm, x.Type().Bits() == 64) @@ -1678,11 +1678,11 @@ func (m *machine) lowerBitwiseAluOp(si *ssa.Instruction, op aluOp, ignoreResult xDef, yDef := m.compiler.ValueDefinition(x), m.compiler.ValueDefinition(y) rn := m.getOperand_NR(xDef, extModeNone) - var rd operand + var rd regalloc.VReg if ignoreResult { - rd = operandNR(xzrVReg) + rd = xzrVReg } else { - rd = operandNR(m.compiler.VRegOf(si.Return())) + rd = m.compiler.VRegOf(si.Return()) } _64 := x.Type().Bits() == 64 @@ -1691,7 +1691,7 @@ func (m *machine) lowerBitwiseAluOp(si *ssa.Instruction, op aluOp, ignoreResult c := instr.ConstantVal() if isBitMaskImmediate(c, _64) { // Constant bit wise operations can be lowered to a single instruction. - alu.asALUBitmaskImm(op, rd.nr(), rn.nr(), c, _64) + alu.asALUBitmaskImm(op, rd, rn.nr(), c, _64) m.insert(alu) return } @@ -1709,25 +1709,25 @@ func (m *machine) lowerRotl(si *ssa.Instruction) { rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - var tmp operand + var tmp regalloc.VReg if _64 { - tmp = operandNR(m.compiler.AllocateVReg(ssa.TypeI64)) + tmp = m.compiler.AllocateVReg(ssa.TypeI64) } else { - tmp = operandNR(m.compiler.AllocateVReg(ssa.TypeI32)) + tmp = m.compiler.AllocateVReg(ssa.TypeI32) } - rd := operandNR(m.compiler.VRegOf(r)) + rd := m.compiler.VRegOf(r) // Encode rotl as neg + rotr: neg is a sub against the zero-reg. m.lowerRotlImpl(rd, rn, rm, tmp, _64) } -func (m *machine) lowerRotlImpl(rd, rn, rm, tmp operand, is64bit bool) { +func (m *machine) lowerRotlImpl(rd regalloc.VReg, rn, rm operand, tmp regalloc.VReg, is64bit bool) { // Encode rotl as neg + rotr: neg is a sub against the zero-reg. neg := m.allocateInstr() neg.asALU(aluOpSub, tmp, operandNR(xzrVReg), rm, is64bit) m.insert(neg) alu := m.allocateInstr() - alu.asALU(aluOpRotR, rd, rn, tmp, is64bit) + alu.asALU(aluOpRotR, rd, rn, operandNR(tmp), is64bit) m.insert(alu) } @@ -1737,7 +1737,7 @@ func (m *machine) lowerRotr(si *ssa.Instruction) { xDef, yDef := m.compiler.ValueDefinition(x), m.compiler.ValueDefinition(y) rn := m.getOperand_NR(xDef, extModeNone) rm := m.getOperand_NR(yDef, extModeNone) - rd := operandNR(m.compiler.VRegOf(si.Return())) + rd := m.compiler.VRegOf(si.Return()) alu := m.allocateInstr() alu.asALU(aluOpRotR, rd, rn, rm, si.Return().Type().Bits() == 64) @@ -1797,7 +1797,7 @@ func (m *machine) lowerImul(x, y, result ssa.Value) { // TODO: if this comes before Add/Sub, we could merge it by putting it into the place of xzrVReg. mul := m.allocateInstr() - mul.asALURRRR(aluOpMAdd, operandNR(rd), rn, rm, operandNR(xzrVReg), x.Type().Bits() == 64) + mul.asALURRRR(aluOpMAdd, rd, rn, rm, xzrVReg, x.Type().Bits() == 64) m.insert(mul) } @@ -1849,22 +1849,22 @@ func (m *machine) lowerPopcnt(x, result ssa.Value) { // mov x5, v0.d[0] ;; finally we mov the result back to a GPR // - rd := operandNR(m.compiler.VRegOf(result)) + rd := m.compiler.VRegOf(result) rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rf1 := operandNR(m.compiler.AllocateVReg(ssa.TypeF64)) ins := m.allocateInstr() - ins.asMovToVec(rf1, rn, vecArrangementD, vecIndex(0)) + ins.asMovToVec(rf1.nr(), rn, vecArrangementD, vecIndex(0)) m.insert(ins) rf2 := operandNR(m.compiler.AllocateVReg(ssa.TypeF64)) cnt := m.allocateInstr() - cnt.asVecMisc(vecOpCnt, rf2, rf1, vecArrangement16B) + cnt.asVecMisc(vecOpCnt, rf2.nr(), rf1, vecArrangement16B) m.insert(cnt) rf3 := operandNR(m.compiler.AllocateVReg(ssa.TypeF64)) uaddlv := m.allocateInstr() - uaddlv.asVecLanes(vecOpUaddlv, rf3, rf2, vecArrangement8B) + uaddlv.asVecLanes(vecOpUaddlv, rf3.nr(), rf2, vecArrangement8B) m.insert(uaddlv) mov := m.allocateInstr() @@ -1879,32 +1879,35 @@ func (m *machine) lowerExitWithCode(execCtxVReg regalloc.VReg, code wazevoapi.Ex loadExitCodeConst.asMOVZ(tmpReg1, uint64(code), 0, true) setExitCode := m.allocateInstr() - setExitCode.asStore(operandNR(tmpReg1), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64(), - }, 32) + mode := m.amodePool.Allocate() + *mode = addressMode{ + kind: addressModeKindRegUnsignedImm12, + rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64(), + } + setExitCode.asStore(operandNR(tmpReg1), mode, 32) // In order to unwind the stack, we also need to push the current stack pointer: tmp2 := m.compiler.AllocateVReg(ssa.TypeI64) movSpToTmp := m.allocateInstr() movSpToTmp.asMove64(tmp2, spVReg) strSpToExecCtx := m.allocateInstr() - strSpToExecCtx.asStore(operandNR(tmp2), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(), - }, 64) + mode2 := m.amodePool.Allocate() + *mode2 = addressMode{ + kind: addressModeKindRegUnsignedImm12, + rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(), + } + strSpToExecCtx.asStore(operandNR(tmp2), mode2, 64) // Also the address of this exit. tmp3 := m.compiler.AllocateVReg(ssa.TypeI64) currentAddrToTmp := m.allocateInstr() currentAddrToTmp.asAdr(tmp3, 0) storeCurrentAddrToExecCtx := m.allocateInstr() - storeCurrentAddrToExecCtx.asStore(operandNR(tmp3), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(), - }, 64) + mode3 := m.amodePool.Allocate() + *mode3 = addressMode{ + kind: addressModeKindRegUnsignedImm12, + rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(), + } + storeCurrentAddrToExecCtx.asStore(operandNR(tmp3), mode3, 64) exitSeq := m.allocateInstr() exitSeq.asExitSequence(execCtxVReg) @@ -1937,7 +1940,7 @@ func (m *machine) lowerIcmpToFlag(x, y ssa.Value, signed bool) { alu.asALU( aluOpSubS, // We don't need the result, just need to set flags. - operandNR(xzrVReg), + xzrVReg, rn, rm, x.Type().Bits() == 64, @@ -2012,7 +2015,7 @@ func (m *machine) lowerSelect(c, x, y, result ssa.Value) { alu.asALU( aluOpSubS, // We don't need the result, just need to set flags. - operandNR(xzrVReg), + xzrVReg, rn, operandNR(xzrVReg), c.Type().Bits() == 64, @@ -2024,7 +2027,7 @@ func (m *machine) lowerSelect(c, x, y, result ssa.Value) { rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(result)) + rd := m.compiler.VRegOf(result) switch x.Type() { case ssa.TypeI32, ssa.TypeI64: // csel rd, rn, rm, cc @@ -2041,10 +2044,10 @@ func (m *machine) lowerSelect(c, x, y, result ssa.Value) { } } -func (m *machine) lowerSelectVec(rc, rn, rm, rd operand) { +func (m *machine) lowerSelectVec(rc, rn, rm operand, rd regalloc.VReg) { // First check if `rc` is zero or not. checkZero := m.allocateInstr() - checkZero.asALU(aluOpSubS, operandNR(xzrVReg), rc, operandNR(xzrVReg), false) + checkZero.asALU(aluOpSubS, xzrVReg, rc, operandNR(xzrVReg), false) m.insert(checkZero) // Then use CSETM to set all bits to one if `rc` is zero. @@ -2054,7 +2057,7 @@ func (m *machine) lowerSelectVec(rc, rn, rm, rd operand) { m.insert(cset) // Then move the bits to the result vector register. - tmp2 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) + tmp2 := m.compiler.AllocateVReg(ssa.TypeV128) dup := m.allocateInstr() dup.asVecDup(tmp2, operandNR(allOnesOrZero), vecArrangement2D) m.insert(dup) @@ -2067,7 +2070,7 @@ func (m *machine) lowerSelectVec(rc, rn, rm, rd operand) { // Finally, move the result to the destination register. mov2 := m.allocateInstr() - mov2.asFpuMov128(rd.nr(), tmp2.nr()) + mov2.asFpuMov128(rd, tmp2) m.insert(mov2) } @@ -2099,28 +2102,28 @@ func (m *machine) lowerAtomicRmw(si *ssa.Instruction) { addr, val := si.Arg2() addrDef, valDef := m.compiler.ValueDefinition(addr), m.compiler.ValueDefinition(val) rn := m.getOperand_NR(addrDef, extModeNone) - rt := operandNR(m.compiler.VRegOf(si.Return())) + rt := m.compiler.VRegOf(si.Return()) rs := m.getOperand_NR(valDef, extModeNone) _64 := si.Return().Type().Bits() == 64 - var tmp operand + var tmp regalloc.VReg if _64 { - tmp = operandNR(m.compiler.AllocateVReg(ssa.TypeI64)) + tmp = m.compiler.AllocateVReg(ssa.TypeI64) } else { - tmp = operandNR(m.compiler.AllocateVReg(ssa.TypeI32)) + tmp = m.compiler.AllocateVReg(ssa.TypeI32) } - m.lowerAtomicRmwImpl(op, rn, rs, rt, tmp, size, negateArg, flipArg, _64) + m.lowerAtomicRmwImpl(op, rn.nr(), rs.nr(), rt, tmp, size, negateArg, flipArg, _64) } -func (m *machine) lowerAtomicRmwImpl(op atomicRmwOp, rn, rs, rt, tmp operand, size uint64, negateArg, flipArg, dst64bit bool) { +func (m *machine) lowerAtomicRmwImpl(op atomicRmwOp, rn, rs, rt, tmp regalloc.VReg, size uint64, negateArg, flipArg, dst64bit bool) { switch { case negateArg: neg := m.allocateInstr() - neg.asALU(aluOpSub, tmp, operandNR(xzrVReg), rs, dst64bit) + neg.asALU(aluOpSub, tmp, operandNR(xzrVReg), operandNR(rs), dst64bit) m.insert(neg) case flipArg: flip := m.allocateInstr() - flip.asALU(aluOpOrn, tmp, operandNR(xzrVReg), rs, dst64bit) + flip.asALU(aluOpOrn, tmp, operandNR(xzrVReg), operandNR(rs), dst64bit) m.insert(flip) default: tmp = rs @@ -2139,32 +2142,32 @@ func (m *machine) lowerAtomicCas(si *ssa.Instruction) { rn := m.getOperand_NR(addrDef, extModeNone) rt := m.getOperand_NR(replDef, extModeNone) rs := m.getOperand_NR(expDef, extModeNone) - tmp := operandNR(m.compiler.AllocateVReg(si.Return().Type())) + tmp := m.compiler.AllocateVReg(si.Return().Type()) _64 := si.Return().Type().Bits() == 64 // rs is overwritten by CAS, so we need to move it to the result register before the instruction // in case when it is used somewhere else. mov := m.allocateInstr() if _64 { - mov.asMove64(tmp.nr(), rs.nr()) + mov.asMove64(tmp, rs.nr()) } else { - mov.asMove32(tmp.nr(), rs.nr()) + mov.asMove32(tmp, rs.nr()) } m.insert(mov) - m.lowerAtomicCasImpl(rn, tmp, rt, size) + m.lowerAtomicCasImpl(rn.nr(), tmp, rt.nr(), size) mov2 := m.allocateInstr() rd := m.compiler.VRegOf(si.Return()) if _64 { - mov2.asMove64(rd, tmp.nr()) + mov2.asMove64(rd, tmp) } else { - mov2.asMove32(rd, tmp.nr()) + mov2.asMove32(rd, tmp) } m.insert(mov2) } -func (m *machine) lowerAtomicCasImpl(rn, rs, rt operand, size uint64) { +func (m *machine) lowerAtomicCasImpl(rn, rs, rt regalloc.VReg, size uint64) { cas := m.allocateInstr() cas.asAtomicCas(rn, rs, rt, size) m.insert(cas) @@ -2176,12 +2179,12 @@ func (m *machine) lowerAtomicLoad(si *ssa.Instruction) { addrDef := m.compiler.ValueDefinition(addr) rn := m.getOperand_NR(addrDef, extModeNone) - rt := operandNR(m.compiler.VRegOf(si.Return())) + rt := m.compiler.VRegOf(si.Return()) - m.lowerAtomicLoadImpl(rn, rt, size) + m.lowerAtomicLoadImpl(rn.nr(), rt, size) } -func (m *machine) lowerAtomicLoadImpl(rn, rt operand, size uint64) { +func (m *machine) lowerAtomicLoadImpl(rn, rt regalloc.VReg, size uint64) { ld := m.allocateInstr() ld.asAtomicLoad(rn, rt, size) m.insert(ld) diff --git a/internal/engine/wazevo/backend/isa/arm64/lower_instr_test.go b/internal/engine/wazevo/backend/isa/arm64/lower_instr_test.go index e87e27606f..6a367944e4 100644 --- a/internal/engine/wazevo/backend/isa/arm64/lower_instr_test.go +++ b/internal/engine/wazevo/backend/isa/arm64/lower_instr_test.go @@ -380,7 +380,7 @@ L2: regalloc.VReg(3).SetRegType(regalloc.RegTypeInt) mc, _, m := newSetupWithMockContext() mc.typeOf = map[regalloc.VRegID]ssa.Type{execCtx.ID(): ssa.TypeI64, 2: ssa.TypeI64, 3: ssa.TypeI64} - m.lowerIDiv(execCtx, operandNR(rd), operandNR(rn), operandNR(rm), tc._64bit, tc.signed) + m.lowerIDiv(execCtx, rd, operandNR(rn), operandNR(rm), tc._64bit, tc.signed) require.Equal(t, tc.exp, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n") }) } @@ -451,7 +451,7 @@ fcvtzu w1, s2 t.Run(tc.name, func(t *testing.T) { mc, _, m := newSetupWithMockContext() mc.typeOf = map[regalloc.VRegID]ssa.Type{v2VReg.ID(): ssa.TypeI64, x15VReg.ID(): ssa.TypeI64} - m.lowerFpuToInt(operandNR(x1VReg), operandNR(v2VReg), x15VReg, false, false, false, tc.nontrapping) + m.lowerFpuToInt(x1VReg, operandNR(v2VReg), x15VReg, false, false, false, tc.nontrapping) require.Equal(t, tc.expectedAsm, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n") m.executableContext.FlushPendingInstructions() @@ -533,7 +533,7 @@ mul x1.4s, x2.4s, x15.4s } { t.Run(tc.name, func(t *testing.T) { _, _, m := newSetupWithMockContext() - m.lowerVIMul(operandNR(x1VReg), operandNR(x2VReg), operandNR(x15VReg), tc.arrangement) + m.lowerVIMul(x1VReg, operandNR(x2VReg), operandNR(x15VReg), tc.arrangement) require.Equal(t, tc.expectedAsm, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n") m.executableContext.FlushPendingInstructions() @@ -638,7 +638,7 @@ cset x15, ne } { t.Run(tc.name, func(t *testing.T) { _, _, m := newSetupWithMockContext() - m.lowerVcheckTrue(tc.op, operandNR(x1VReg), operandNR(x15VReg), tc.arrangement) + m.lowerVcheckTrue(tc.op, operandNR(x1VReg), x15VReg, tc.arrangement) require.Equal(t, tc.expectedAsm, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n") m.executableContext.FlushPendingInstructions() @@ -723,7 +723,7 @@ add w15, w15, w1?, lsl #1 } { t.Run(tc.name, func(t *testing.T) { _, _, m := newSetupWithMockContext() - m.lowerVhighBits(operandNR(x1VReg), operandNR(x15VReg), tc.arrangement) + m.lowerVhighBits(operandNR(x1VReg), x15VReg, tc.arrangement) require.Equal(t, tc.expectedAsm, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n") m.executableContext.FlushPendingInstructions() @@ -772,7 +772,7 @@ tbl x1.16b, { v29.16b, v30.16b }, v1?.16b lane1 := lanes[7]<<56 | lanes[6]<<48 | lanes[5]<<40 | lanes[4]<<32 | lanes[3]<<24 | lanes[2]<<16 | lanes[1]<<8 | lanes[0] lane2 := lanes[15]<<56 | lanes[14]<<48 | lanes[13]<<40 | lanes[12]<<32 | lanes[11]<<24 | lanes[10]<<16 | lanes[9]<<8 | lanes[8] - m.lowerShuffle(operandNR(x1VReg), operandNR(x2VReg), operandNR(x15VReg), lane1, lane2) + m.lowerShuffle(x1VReg, operandNR(x2VReg), operandNR(x15VReg), lane1, lane2) require.Equal(t, tc.expectedAsm, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n") m.executableContext.FlushPendingInstructions() @@ -829,7 +829,7 @@ ushl x1.16b, x2.16b, v2?.16b } { t.Run(tc.name, func(t *testing.T) { _, _, m := newSetupWithMockContext() - m.lowerVShift(tc.op, operandNR(x1VReg), operandNR(x2VReg), operandNR(x15VReg), tc.arrangement) + m.lowerVShift(tc.op, x1VReg, operandNR(x2VReg), operandNR(x15VReg), tc.arrangement) require.Equal(t, tc.expectedAsm, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n") m.executableContext.FlushPendingInstructions() @@ -845,12 +845,12 @@ func TestMachine_lowerSelectVec(t *testing.T) { c := operandNR(m.compiler.AllocateVReg(ssa.TypeI32)) rn := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) rm := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) - rd := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) + rd := m.compiler.AllocateVReg(ssa.TypeV128) require.Equal(t, 1, int(c.reg().ID())) require.Equal(t, 2, int(rn.reg().ID())) require.Equal(t, 3, int(rm.reg().ID())) - require.Equal(t, 4, int(rd.reg().ID())) + require.Equal(t, 4, int(rd.ID())) m.lowerSelectVec(c, rn, rm, rd) require.Equal(t, ` @@ -898,17 +898,17 @@ mov v5?.8b, v6?.8b typ = ssa.TypeI32 ftyp = ssa.TypeF32 } - tmpI := operandNR(m.compiler.AllocateVReg(typ)) - tmpF := operandNR(m.compiler.AllocateVReg(ftyp)) + tmpI := m.compiler.AllocateVReg(typ) + tmpF := m.compiler.AllocateVReg(ftyp) rn := operandNR(m.compiler.AllocateVReg(ftyp)) rm := operandNR(m.compiler.AllocateVReg(ftyp)) - rd := operandNR(m.compiler.AllocateVReg(ftyp)) + rd := m.compiler.AllocateVReg(ftyp) - require.Equal(t, 1, int(tmpI.reg().ID())) - require.Equal(t, 2, int(tmpF.reg().ID())) + require.Equal(t, 1, int(tmpI.ID())) + require.Equal(t, 2, int(tmpF.ID())) require.Equal(t, 3, int(rn.reg().ID())) require.Equal(t, 4, int(rm.reg().ID())) - require.Equal(t, 5, int(rd.reg().ID())) + require.Equal(t, 5, int(rd.ID())) m.lowerFcopysignImpl(rd, rn, rm, tmpI, tmpF, tc._64bit) require.Equal(t, tc.exp, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n") @@ -944,15 +944,15 @@ ror x4?, x2?, x1? } else { typ = ssa.TypeI32 } - tmpI := operandNR(m.compiler.AllocateVReg(typ)) + tmpI := m.compiler.AllocateVReg(typ) rn := operandNR(m.compiler.AllocateVReg(typ)) rm := operandNR(m.compiler.AllocateVReg(typ)) - rd := operandNR(m.compiler.AllocateVReg(typ)) + rd := m.compiler.AllocateVReg(typ) - require.Equal(t, 1, int(tmpI.reg().ID())) + require.Equal(t, 1, int(tmpI.ID())) require.Equal(t, 2, int(rn.reg().ID())) require.Equal(t, 3, int(rm.reg().ID())) - require.Equal(t, 4, int(rd.reg().ID())) + require.Equal(t, 4, int(rd.ID())) m.lowerRotlImpl(rd, rn, rm, tmpI, tc._64bit) require.Equal(t, tc.exp, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n") @@ -1370,15 +1370,15 @@ swpalb w3?, w4?, x2? } else { typ = ssa.TypeI32 } - tmp := operandNR(m.compiler.AllocateVReg(typ)) - rn := operandNR(m.compiler.AllocateVReg(ssa.TypeI64)) - rs := operandNR(m.compiler.AllocateVReg(typ)) - rt := operandNR(m.compiler.AllocateVReg(typ)) + tmp := m.compiler.AllocateVReg(typ) + rn := m.compiler.AllocateVReg(ssa.TypeI64) + rs := m.compiler.AllocateVReg(typ) + rt := m.compiler.AllocateVReg(typ) - require.Equal(t, 1, int(tmp.reg().ID())) - require.Equal(t, 2, int(rn.reg().ID())) - require.Equal(t, 3, int(rs.reg().ID())) - require.Equal(t, 4, int(rt.reg().ID())) + require.Equal(t, 1, int(tmp.ID())) + require.Equal(t, 2, int(rn.ID())) + require.Equal(t, 3, int(rs.ID())) + require.Equal(t, 4, int(rt.ID())) m.lowerAtomicRmwImpl(tc.op, rn, rs, rt, tmp, tc.size, tc.negateArg, tc.flipArg, tc._64bit) require.Equal(t, tc.exp, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n") @@ -1458,13 +1458,13 @@ casalb w2?, w3?, x1? } else { typ = ssa.TypeI32 } - rn := operandNR(m.compiler.AllocateVReg(ssa.TypeI64)) - rs := operandNR(m.compiler.AllocateVReg(typ)) - rt := operandNR(m.compiler.AllocateVReg(typ)) + rn := m.compiler.AllocateVReg(ssa.TypeI64) + rs := m.compiler.AllocateVReg(typ) + rt := m.compiler.AllocateVReg(typ) - require.Equal(t, 1, int(rn.reg().ID())) - require.Equal(t, 2, int(rs.reg().ID())) - require.Equal(t, 3, int(rt.reg().ID())) + require.Equal(t, 1, int(rn.ID())) + require.Equal(t, 2, int(rs.ID())) + require.Equal(t, 3, int(rt.ID())) m.lowerAtomicCasImpl(rn, rs, rt, tc.size) require.Equal(t, tc.exp, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n") @@ -1544,11 +1544,11 @@ ldarb w2?, x1? } else { typ = ssa.TypeI32 } - rn := operandNR(m.compiler.AllocateVReg(ssa.TypeI64)) - rt := operandNR(m.compiler.AllocateVReg(typ)) + rn := m.compiler.AllocateVReg(ssa.TypeI64) + rt := m.compiler.AllocateVReg(typ) - require.Equal(t, 1, int(rn.reg().ID())) - require.Equal(t, 2, int(rt.reg().ID())) + require.Equal(t, 1, int(rn.ID())) + require.Equal(t, 2, int(rt.ID())) m.lowerAtomicLoadImpl(rn, rt, tc.size) require.Equal(t, tc.exp, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n") diff --git a/internal/engine/wazevo/backend/isa/arm64/lower_mem.go b/internal/engine/wazevo/backend/isa/arm64/lower_mem.go index 4842eaa382..fd0760d723 100644 --- a/internal/engine/wazevo/backend/isa/arm64/lower_mem.go +++ b/internal/engine/wazevo/backend/isa/arm64/lower_mem.go @@ -24,6 +24,14 @@ type ( addressModeKind byte ) +func resetAddressMode(a *addressMode) { + a.kind = 0 + a.rn = 0 + a.rm = 0 + a.extOp = 0 + a.imm = 0 +} + const ( // addressModeKindRegExtended takes a base register and an index register. The index register is sign/zero-extended, // and then scaled by bits(type)/8. @@ -140,15 +148,17 @@ func (a addressMode) format(dstSizeBits byte) (ret string) { return } -func addressModePreOrPostIndex(rn regalloc.VReg, imm int64, preIndex bool) addressMode { +func addressModePreOrPostIndex(m *machine, rn regalloc.VReg, imm int64, preIndex bool) *addressMode { if !offsetFitsInAddressModeKindRegSignedImm9(imm) { panic(fmt.Sprintf("BUG: offset %#x does not fit in addressModeKindRegSignedImm9", imm)) } + mode := m.amodePool.Allocate() if preIndex { - return addressMode{kind: addressModeKindPreIndex, rn: rn, imm: imm} + *mode = addressMode{kind: addressModeKindPreIndex, rn: rn, imm: imm} } else { - return addressMode{kind: addressModeKindPostIndex, rn: rn, imm: imm} + *mode = addressMode{kind: addressModeKindPostIndex, rn: rn, imm: imm} } + return mode } func offsetFitsInAddressModeKindRegUnsignedImm12(dstSizeInBits byte, offset int64) bool { @@ -207,9 +217,9 @@ func (m *machine) lowerExtLoad(op ssa.Opcode, ptr ssa.Value, offset uint32, ret amode := m.lowerToAddressMode(ptr, offset, size) load := m.allocateInstr() if signed { - load.asSLoad(operandNR(ret), amode, size) + load.asSLoad(ret, amode, size) } else { - load.asULoad(operandNR(ret), amode, size) + load.asULoad(ret, amode, size) } m.insert(load) } @@ -221,11 +231,11 @@ func (m *machine) lowerLoad(ptr ssa.Value, offset uint32, typ ssa.Type, ret ssa. load := m.allocateInstr() switch typ { case ssa.TypeI32, ssa.TypeI64: - load.asULoad(operandNR(dst), amode, typ.Bits()) + load.asULoad(dst, amode, typ.Bits()) case ssa.TypeF32, ssa.TypeF64: - load.asFpuLoad(operandNR(dst), amode, typ.Bits()) + load.asFpuLoad(dst, amode, typ.Bits()) case ssa.TypeV128: - load.asFpuLoad(operandNR(dst), amode, 128) + load.asFpuLoad(dst, amode, 128) default: panic("TODO") } @@ -239,7 +249,7 @@ func (m *machine) lowerLoadSplat(ptr ssa.Value, offset uint32, lane ssa.VecLane, m.lowerConstantI64(offsetReg, int64(offset)) addedBase := m.addReg64ToReg64(base, offsetReg) - rd := operandNR(m.compiler.VRegOf(ret)) + rd := m.compiler.VRegOf(ret) ld1r := m.allocateInstr() ld1r.asVecLoad1R(rd, operandNR(addedBase), ssaLaneToArrangement(lane)) @@ -258,7 +268,7 @@ func (m *machine) lowerStore(si *ssa.Instruction) { } // lowerToAddressMode converts a pointer to an addressMode that can be used as an operand for load/store instructions. -func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32, size byte) (amode addressMode) { +func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32, size byte) (amode *addressMode) { // TODO: currently the instruction selection logic doesn't support addressModeKindRegScaledExtended and // addressModeKindRegScaled since collectAddends doesn't take ssa.OpcodeIshl into account. This should be fixed // to support more efficient address resolution. @@ -272,32 +282,33 @@ func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32, size byte // During the construction, this might emit additional instructions. // // Extracted as a separate function for easy testing. -func (m *machine) lowerToAddressModeFromAddends(a32s *wazevoapi.Queue[addend32], a64s *wazevoapi.Queue[regalloc.VReg], size byte, offset int64) (amode addressMode) { +func (m *machine) lowerToAddressModeFromAddends(a32s *wazevoapi.Queue[addend32], a64s *wazevoapi.Queue[regalloc.VReg], size byte, offset int64) (amode *addressMode) { + amode = m.amodePool.Allocate() switch a64sExist, a32sExist := !a64s.Empty(), !a32s.Empty(); { case a64sExist && a32sExist: var base regalloc.VReg base = a64s.Dequeue() var a32 addend32 a32 = a32s.Dequeue() - amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: a32.r, extOp: a32.ext} + *amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: a32.r, extOp: a32.ext} case a64sExist && offsetFitsInAddressModeKindRegUnsignedImm12(size, offset): var base regalloc.VReg base = a64s.Dequeue() - amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: offset} + *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: offset} offset = 0 case a64sExist && offsetFitsInAddressModeKindRegSignedImm9(offset): var base regalloc.VReg base = a64s.Dequeue() - amode = addressMode{kind: addressModeKindRegSignedImm9, rn: base, imm: offset} + *amode = addressMode{kind: addressModeKindRegSignedImm9, rn: base, imm: offset} offset = 0 case a64sExist: var base regalloc.VReg base = a64s.Dequeue() if !a64s.Empty() { index := a64s.Dequeue() - amode = addressMode{kind: addressModeKindRegReg, rn: base, rm: index, extOp: extendOpUXTX /* indicates index reg is 64-bit */} + *amode = addressMode{kind: addressModeKindRegReg, rn: base, rm: index, extOp: extendOpUXTX /* indicates index reg is 64-bit */} } else { - amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0} + *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0} } case a32sExist: base32 := a32s.Dequeue() @@ -314,14 +325,14 @@ func (m *machine) lowerToAddressModeFromAddends(a32s *wazevoapi.Queue[addend32], if !a32s.Empty() { index := a32s.Dequeue() - amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: index.r, extOp: index.ext} + *amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: index.r, extOp: index.ext} } else { - amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0} + *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0} } default: // Only static offsets. tmpReg := m.compiler.AllocateVReg(ssa.TypeI64) m.lowerConstantI64(tmpReg, offset) - amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: tmpReg, imm: 0} + *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: tmpReg, imm: 0} offset = 0 } @@ -411,13 +422,13 @@ func (m *machine) addConstToReg64(r regalloc.VReg, c int64) (rd regalloc.VReg) { rd = m.compiler.AllocateVReg(ssa.TypeI64) alu := m.allocateInstr() if imm12Op, ok := asImm12Operand(uint64(c)); ok { - alu.asALU(aluOpAdd, operandNR(rd), operandNR(r), imm12Op, true) + alu.asALU(aluOpAdd, rd, operandNR(r), imm12Op, true) } else if imm12Op, ok = asImm12Operand(uint64(-c)); ok { - alu.asALU(aluOpSub, operandNR(rd), operandNR(r), imm12Op, true) + alu.asALU(aluOpSub, rd, operandNR(r), imm12Op, true) } else { tmp := m.compiler.AllocateVReg(ssa.TypeI64) m.load64bitConst(c, tmp) - alu.asALU(aluOpAdd, operandNR(rd), operandNR(r), operandNR(tmp), true) + alu.asALU(aluOpAdd, rd, operandNR(r), operandNR(tmp), true) } m.insert(alu) return @@ -426,7 +437,7 @@ func (m *machine) addConstToReg64(r regalloc.VReg, c int64) (rd regalloc.VReg) { func (m *machine) addReg64ToReg64(rn, rm regalloc.VReg) (rd regalloc.VReg) { rd = m.compiler.AllocateVReg(ssa.TypeI64) alu := m.allocateInstr() - alu.asALU(aluOpAdd, operandNR(rd), operandNR(rn), operandNR(rm), true) + alu.asALU(aluOpAdd, rd, operandNR(rn), operandNR(rm), true) m.insert(alu) return } @@ -434,7 +445,7 @@ func (m *machine) addReg64ToReg64(rn, rm regalloc.VReg) (rd regalloc.VReg) { func (m *machine) addRegToReg64Ext(rn, rm regalloc.VReg, ext extendOp) (rd regalloc.VReg) { rd = m.compiler.AllocateVReg(ssa.TypeI64) alu := m.allocateInstr() - alu.asALU(aluOpAdd, operandNR(rd), operandNR(rn), operandER(rm, ext, 64), true) + alu.asALU(aluOpAdd, rd, operandNR(rn), operandER(rm, ext, 64), true) m.insert(alu) return } diff --git a/internal/engine/wazevo/backend/isa/arm64/lower_mem_test.go b/internal/engine/wazevo/backend/isa/arm64/lower_mem_test.go index 82ab297f3a..e7a054d75a 100644 --- a/internal/engine/wazevo/backend/isa/arm64/lower_mem_test.go +++ b/internal/engine/wazevo/backend/isa/arm64/lower_mem_test.go @@ -807,7 +807,7 @@ func TestMachine_lowerToAddressModeFromAddends(t *testing.T) { } actual := m.lowerToAddressModeFromAddends(&a32s, &a64s, tc.dstSizeInBits, tc.offset) require.Equal(t, strings.Join(tc.insts, "\n"), formatEmittedInstructionsInCurrentBlock(m)) - require.Equal(t, tc.exp, actual, actual.format(tc.dstSizeInBits)) + require.Equal(t, &tc.exp, actual, actual.format(tc.dstSizeInBits)) }) } } diff --git a/internal/engine/wazevo/backend/isa/arm64/machine.go b/internal/engine/wazevo/backend/isa/arm64/machine.go index b435d9ba96..311b34f4a1 100644 --- a/internal/engine/wazevo/backend/isa/arm64/machine.go +++ b/internal/engine/wazevo/backend/isa/arm64/machine.go @@ -21,6 +21,8 @@ type ( regAlloc regalloc.Allocator regAllocFn *backend.RegAllocFunction[*instruction, *machine] + amodePool wazevoapi.Pool[addressMode] + // addendsWorkQueue is used during address lowering, defined here for reuse. addendsWorkQueue wazevoapi.Queue[ssa.Value] addends32 wazevoapi.Queue[addend32] @@ -105,6 +107,7 @@ func NewBackend() backend.Machine { spillSlots: make(map[regalloc.VRegID]int64), executableContext: newExecutableContext(), regAlloc: regalloc.NewAllocator(regInfo), + amodePool: wazevoapi.NewPool[addressMode](resetAddressMode), } return m } @@ -149,6 +152,7 @@ func (m *machine) Reset() { m.maxRequiredStackSizeForCalls = 0 m.executableContext.Reset() m.jmpTableTargets = m.jmpTableTargets[:0] + m.amodePool.Reset() } // SetCurrentABI implements backend.Machine SetCurrentABI. @@ -209,7 +213,7 @@ func (m *machine) allocateNop() *instruction { } func (m *machine) resolveAddressingMode(arg0offset, ret0offset int64, i *instruction) { - amode := &i.amode + amode := i.getAmode() switch amode.kind { case addressModeKindResultStackSpace: amode.imm += ret0offset diff --git a/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go b/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go index 466fac4640..d9032f9218 100644 --- a/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go +++ b/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go @@ -70,7 +70,7 @@ func (m *machine) setupPrologue() { // +-----------------+ <----- SP // (low address) // - _amode := addressModePreOrPostIndex(spVReg, + _amode := addressModePreOrPostIndex(m, spVReg, -16, // stack pointer must be 16-byte aligned. true, // Decrement before store. ) @@ -159,7 +159,7 @@ func (m *machine) createReturnAddrAndSizeOfArgRetSlot(cur *instruction) *instruc sizeOfArgRetReg = tmpRegVReg subSp := m.allocateInstr() - subSp.asALU(aluOpSub, operandNR(spVReg), operandNR(spVReg), operandNR(sizeOfArgRetReg), true) + subSp.asALU(aluOpSub, spVReg, operandNR(spVReg), operandNR(sizeOfArgRetReg), true) cur = linkInstr(cur, subSp) } else { sizeOfArgRetReg = xzrVReg @@ -168,7 +168,7 @@ func (m *machine) createReturnAddrAndSizeOfArgRetSlot(cur *instruction) *instruc // Saves the return address (lr) and the size_of_arg_ret below the SP. // size_of_arg_ret is used for stack unwinding. pstr := m.allocateInstr() - amode := addressModePreOrPostIndex(spVReg, -16, true /* decrement before store */) + amode := addressModePreOrPostIndex(m, spVReg, -16, true /* decrement before store */) pstr.asStorePair64(lrVReg, sizeOfArgRetReg, amode) cur = linkInstr(cur, pstr) return cur @@ -182,7 +182,7 @@ func (m *machine) createFrameSizeSlot(cur *instruction, s int64) *instruction { } else { frameSizeReg = xzrVReg } - _amode := addressModePreOrPostIndex(spVReg, + _amode := addressModePreOrPostIndex(m, spVReg, -16, // stack pointer must be 16-byte aligned. true, // Decrement before store. ) @@ -213,7 +213,7 @@ func (m *machine) postRegAlloc() { m.executableContext.PendingInstructions = m.executableContext.PendingInstructions[:0] default: // Removes the redundant copy instruction. - if cur.IsCopy() && cur.rn.realReg() == cur.rd.realReg() { + if cur.IsCopy() && cur.rn.realReg() == cur.rd.RealReg() { prev, next := cur.prev, cur.next // Remove the copy instruction. prev.next = next @@ -286,16 +286,16 @@ func (m *machine) setupEpilogueAfter(cur *instruction) { for i := range m.clobberedRegs { vr := m.clobberedRegs[l-i] // reverse order to restore. load := m.allocateInstr() - amode := addressModePreOrPostIndex(spVReg, + amode := addressModePreOrPostIndex(m, spVReg, 16, // stack pointer must be 16-byte aligned. false, // Increment after store. ) // TODO: pair loads to reduce the number of instructions. switch regTypeToRegisterSizeInBits(vr.RegType()) { case 64: // save int reg. - load.asULoad(operandNR(vr), amode, 64) + load.asULoad(vr, amode, 64) case 128: // save vector reg. - load.asFpuLoad(operandNR(vr), amode, 128) + load.asFpuLoad(vr, amode, 128) } cur = linkInstr(cur, load) } @@ -317,8 +317,8 @@ func (m *machine) setupEpilogueAfter(cur *instruction) { // SP----> +-----------------+ ldr := m.allocateInstr() - ldr.asULoad(operandNR(lrVReg), - addressModePreOrPostIndex(spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64) + ldr.asULoad(lrVReg, + addressModePreOrPostIndex(m, spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64) cur = linkInstr(cur, ldr) if s := int64(m.currentABI.AlignedArgResultStackSlotSize()); s > 0 { @@ -351,14 +351,14 @@ func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instructi if immm12op, ok := asImm12Operand(uint64(requiredStackSize)); ok { // sub tmp, sp, #requiredStackSize sub := m.allocateInstr() - sub.asALU(aluOpSub, operandNR(tmpRegVReg), operandNR(spVReg), immm12op, true) + sub.asALU(aluOpSub, tmpRegVReg, operandNR(spVReg), immm12op, true) cur = linkInstr(cur, sub) } else { // This case, we first load the requiredStackSize into the temporary register, cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, requiredStackSize) // Then subtract it. sub := m.allocateInstr() - sub.asALU(aluOpSub, operandNR(tmpRegVReg), operandNR(spVReg), operandNR(tmpRegVReg), true) + sub.asALU(aluOpSub, tmpRegVReg, operandNR(spVReg), operandNR(tmpRegVReg), true) cur = linkInstr(cur, sub) } @@ -366,16 +366,18 @@ func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instructi // ldr tmp2, [executionContext #StackBottomPtr] ldr := m.allocateInstr() - ldr.asULoad(operandNR(tmp2), addressMode{ + amode := m.amodePool.Allocate() + *amode = addressMode{ kind: addressModeKindRegUnsignedImm12, rn: x0VReg, // execution context is always the first argument. imm: wazevoapi.ExecutionContextOffsetStackBottomPtr.I64(), - }, 64) + } + ldr.asULoad(tmp2, amode, 64) cur = linkInstr(cur, ldr) // subs xzr, tmp, tmp2 subs := m.allocateInstr() - subs.asALU(aluOpSubS, operandNR(xzrVReg), operandNR(tmpRegVReg), operandNR(tmp2), true) + subs.asALU(aluOpSubS, xzrVReg, operandNR(tmpRegVReg), operandNR(tmp2), true) cur = linkInstr(cur, subs) // b.ge #imm @@ -388,22 +390,25 @@ func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instructi // First load the requiredStackSize into the temporary register, cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, requiredStackSize) setRequiredStackSize := m.allocateInstr() - setRequiredStackSize.asStore(operandNR(tmpRegVReg), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - // Execution context is always the first argument. - rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetStackGrowRequiredSize.I64(), - }, 64) + amode := m.amodePool.Allocate() + *amode = addressMode{ + kind: addressModeKindRegUnsignedImm12, + // Execution context is always the first argument. + rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetStackGrowRequiredSize.I64(), + } + setRequiredStackSize.asStore(operandNR(tmpRegVReg), amode, 64) cur = linkInstr(cur, setRequiredStackSize) } ldrAddress := m.allocateInstr() - ldrAddress.asULoad(operandNR(tmpRegVReg), addressMode{ + amode2 := m.amodePool.Allocate() + *amode2 = addressMode{ kind: addressModeKindRegUnsignedImm12, rn: x0VReg, // execution context is always the first argument imm: wazevoapi.ExecutionContextOffsetStackGrowCallTrampolineAddress.I64(), - }, 64) + } + ldrAddress.asULoad(tmpRegVReg, amode2, 64) cur = linkInstr(cur, ldrAddress) // Then jumps to the stack grow call sequence's address, meaning diff --git a/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go b/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go index 1c8793b73d..c7eb92cc20 100644 --- a/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go +++ b/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go @@ -91,7 +91,7 @@ func (m *machine) InsertStoreRegisterAt(v regalloc.VReg, instr *instruction, aft } offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size()) - var amode addressMode + var amode *addressMode cur, amode = m.resolveAddressModeForOffsetAndInsert(cur, offsetFromSP, typ.Bits(), spVReg, true) store := m.allocateInstr() store.asStore(operandNR(v), amode, typ.Bits()) @@ -116,16 +116,16 @@ func (m *machine) InsertReloadRegisterAt(v regalloc.VReg, instr *instruction, af } offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size()) - var amode addressMode + var amode *addressMode cur, amode = m.resolveAddressModeForOffsetAndInsert(cur, offsetFromSP, typ.Bits(), spVReg, true) load := m.allocateInstr() switch typ { case ssa.TypeI32, ssa.TypeI64: - load.asULoad(operandNR(v), amode, typ.Bits()) + load.asULoad(v, amode, typ.Bits()) case ssa.TypeF32, ssa.TypeF64: - load.asFpuLoad(operandNR(v), amode, typ.Bits()) + load.asFpuLoad(v, amode, typ.Bits()) case ssa.TypeV128: - load.asFpuLoad(operandNR(v), amode, 128) + load.asFpuLoad(v, amode, 128) default: panic("TODO") } diff --git a/internal/engine/wazevo/backend/isa/arm64/machine_test.go b/internal/engine/wazevo/backend/isa/arm64/machine_test.go index a585f1fba7..882b7384ed 100644 --- a/internal/engine/wazevo/backend/isa/arm64/machine_test.go +++ b/internal/engine/wazevo/backend/isa/arm64/machine_test.go @@ -9,39 +9,43 @@ import ( ) func TestMachine_resolveAddressingMode(t *testing.T) { + m := NewBackend().(*machine) t.Run("imm12/arg", func(t *testing.T) { - m := &machine{} i := &instruction{} - i.asULoad(operandNR(x17VReg), addressMode{ + amode := m.amodePool.Allocate() + *amode = addressMode{ kind: addressModeKindArgStackSpace, rn: spVReg, imm: 128, - }, 64) + } + i.asULoad(x17VReg, amode, 64) m.resolveAddressingMode(1024, 0, i) - require.Equal(t, addressModeKindRegUnsignedImm12, i.amode.kind) - require.Equal(t, int64(128+1024), i.amode.imm) + require.Equal(t, addressModeKindRegUnsignedImm12, i.getAmode().kind) + require.Equal(t, int64(128+1024), i.getAmode().imm) }) t.Run("imm12/result", func(t *testing.T) { - m := &machine{} i := &instruction{} - i.asULoad(operandNR(x17VReg), addressMode{ + amode := m.amodePool.Allocate() + *amode = addressMode{ kind: addressModeKindResultStackSpace, rn: spVReg, imm: 128, - }, 64) + } + i.asULoad(x17VReg, amode, 64) m.resolveAddressingMode(0, 256, i) - require.Equal(t, addressModeKindRegUnsignedImm12, i.amode.kind) - require.Equal(t, int64(128+256), i.amode.imm) + require.Equal(t, addressModeKindRegUnsignedImm12, i.getAmode().kind) + require.Equal(t, int64(128+256), i.getAmode().imm) }) t.Run("tmp reg", func(t *testing.T) { - m := &machine{executableContext: newExecutableContext()} root := &instruction{kind: udf} i := &instruction{prev: root} - i.asULoad(operandNR(x17VReg), addressMode{ + amode := m.amodePool.Allocate() + *amode = addressMode{ kind: addressModeKindResultStackSpace, rn: spVReg, - }, 64) + } + i.asULoad(x17VReg, amode, 64) m.resolveAddressingMode(0, 0x40000001, i) m.executableContext.RootInstr = root