Skip to content

Commit 3166647

Browse files
committed
[AVR] Fix atomicrmw result value
This patch fixes the atomicrmw result value to be the value before the operation instead of the value after the operation. This was a bug, left as a FIXME in the code (see https://reviews.llvm.org/D97127). From the LangRef: > The contents of memory at the location specified by the <pointer> > operand are atomically read, modified, and written back. The original > value at the location is returned. Doing this expansion early allows the register allocator to arrange registers in such a way that commutable operations are simply swapped around as needed, which results in shorter code while still being correct. Differential Revision: https://reviews.llvm.org/D117725
1 parent a2601c9 commit 3166647

File tree

9 files changed

+150
-164
lines changed

9 files changed

+150
-164
lines changed

llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp

Lines changed: 8 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,6 @@ class AVRExpandPseudo : public MachineFunctionPass {
5454
const Register SCRATCH_REGISTER = AVR::R0;
5555
/// The register that will always contain zero.
5656
const Register ZERO_REGISTER = AVR::R1;
57-
/// The IO address of the status register.
58-
const unsigned SREG_ADDR = 0x3f;
5957

6058
bool expandMBB(Block &MBB);
6159
bool expandMI(Block &MBB, BlockIt MBBI);
@@ -86,9 +84,6 @@ class AVRExpandPseudo : public MachineFunctionPass {
8684

8785
bool expandAtomicBinaryOp(unsigned Opcode, Block &MBB, BlockIt MBBI);
8886

89-
bool expandAtomicArithmeticOp(unsigned MemOpcode, unsigned ArithOpcode,
90-
Block &MBB, BlockIt MBBI);
91-
9287
/// Specific shift implementation.
9388
bool expandLSLB7Rd(Block &MBB, BlockIt MBBI);
9489
bool expandLSRB7Rd(Block &MBB, BlockIt MBBI);
@@ -917,21 +912,23 @@ bool AVRExpandPseudo::expand<AVR::ELPMWRdZPi>(Block &MBB, BlockIt MBBI) {
917912

918913
template <typename Func>
919914
bool AVRExpandPseudo::expandAtomic(Block &MBB, BlockIt MBBI, Func f) {
920-
// Remove the pseudo instruction.
921915
MachineInstr &MI = *MBBI;
916+
const AVRSubtarget &STI = MBB.getParent()->getSubtarget<AVRSubtarget>();
922917

923918
// Store the SREG.
924919
buildMI(MBB, MBBI, AVR::INRdA)
925920
.addReg(SCRATCH_REGISTER, RegState::Define)
926-
.addImm(SREG_ADDR);
921+
.addImm(STI.getIORegSREG());
927922

928923
// Disable exceptions.
929924
buildMI(MBB, MBBI, AVR::BCLRs).addImm(7); // CLI
930925

931926
f(MI);
932927

933928
// Restore the status reg.
934-
buildMI(MBB, MBBI, AVR::OUTARr).addImm(SREG_ADDR).addReg(SCRATCH_REGISTER);
929+
buildMI(MBB, MBBI, AVR::OUTARr)
930+
.addImm(STI.getIORegSREG())
931+
.addReg(SCRATCH_REGISTER);
935932

936933
MI.eraseFromParent();
937934
return true;
@@ -955,31 +952,6 @@ bool AVRExpandPseudo::expandAtomicBinaryOp(unsigned Opcode, Block &MBB,
955952
return expandAtomicBinaryOp(Opcode, MBB, MBBI, [](MachineInstr &MI) {});
956953
}
957954

958-
bool AVRExpandPseudo::expandAtomicArithmeticOp(unsigned Width,
959-
unsigned ArithOpcode, Block &MBB,
960-
BlockIt MBBI) {
961-
return expandAtomic(MBB, MBBI, [&](MachineInstr &MI) {
962-
auto DstReg = MI.getOperand(0).getReg();
963-
auto PtrOp = MI.getOperand(1);
964-
auto SrcReg = MI.getOperand(2).getReg();
965-
966-
unsigned LoadOpcode = (Width == 8) ? AVR::LDRdPtr : AVR::LDWRdPtr;
967-
unsigned StoreOpcode = (Width == 8) ? AVR::STPtrRr : AVR::STWPtrRr;
968-
969-
// FIXME: this returns the new value (after the operation), not the old
970-
// value as the atomicrmw instruction is supposed to do!
971-
972-
// Create the load
973-
buildMI(MBB, MBBI, LoadOpcode, DstReg).addReg(PtrOp.getReg());
974-
975-
// Create the arithmetic op
976-
buildMI(MBB, MBBI, ArithOpcode, DstReg).addReg(DstReg).addReg(SrcReg);
977-
978-
// Create the store
979-
buildMI(MBB, MBBI, StoreOpcode).add(PtrOp).addReg(DstReg);
980-
});
981-
}
982-
983955
Register AVRExpandPseudo::scavengeGPR8(MachineInstr &MI) {
984956
MachineBasicBlock &MBB = *MI.getParent();
985957
RegScavenger RS;
@@ -1025,56 +997,6 @@ bool AVRExpandPseudo::expand<AVR::AtomicStore16>(Block &MBB, BlockIt MBBI) {
1025997
return expandAtomicBinaryOp(AVR::STWPtrRr, MBB, MBBI);
1026998
}
1027999

1028-
template <>
1029-
bool AVRExpandPseudo::expand<AVR::AtomicLoadAdd8>(Block &MBB, BlockIt MBBI) {
1030-
return expandAtomicArithmeticOp(8, AVR::ADDRdRr, MBB, MBBI);
1031-
}
1032-
1033-
template <>
1034-
bool AVRExpandPseudo::expand<AVR::AtomicLoadAdd16>(Block &MBB, BlockIt MBBI) {
1035-
return expandAtomicArithmeticOp(16, AVR::ADDWRdRr, MBB, MBBI);
1036-
}
1037-
1038-
template <>
1039-
bool AVRExpandPseudo::expand<AVR::AtomicLoadSub8>(Block &MBB, BlockIt MBBI) {
1040-
return expandAtomicArithmeticOp(8, AVR::SUBRdRr, MBB, MBBI);
1041-
}
1042-
1043-
template <>
1044-
bool AVRExpandPseudo::expand<AVR::AtomicLoadSub16>(Block &MBB, BlockIt MBBI) {
1045-
return expandAtomicArithmeticOp(16, AVR::SUBWRdRr, MBB, MBBI);
1046-
}
1047-
1048-
template <>
1049-
bool AVRExpandPseudo::expand<AVR::AtomicLoadAnd8>(Block &MBB, BlockIt MBBI) {
1050-
return expandAtomicArithmeticOp(8, AVR::ANDRdRr, MBB, MBBI);
1051-
}
1052-
1053-
template <>
1054-
bool AVRExpandPseudo::expand<AVR::AtomicLoadAnd16>(Block &MBB, BlockIt MBBI) {
1055-
return expandAtomicArithmeticOp(16, AVR::ANDWRdRr, MBB, MBBI);
1056-
}
1057-
1058-
template <>
1059-
bool AVRExpandPseudo::expand<AVR::AtomicLoadOr8>(Block &MBB, BlockIt MBBI) {
1060-
return expandAtomicArithmeticOp(8, AVR::ORRdRr, MBB, MBBI);
1061-
}
1062-
1063-
template <>
1064-
bool AVRExpandPseudo::expand<AVR::AtomicLoadOr16>(Block &MBB, BlockIt MBBI) {
1065-
return expandAtomicArithmeticOp(16, AVR::ORWRdRr, MBB, MBBI);
1066-
}
1067-
1068-
template <>
1069-
bool AVRExpandPseudo::expand<AVR::AtomicLoadXor8>(Block &MBB, BlockIt MBBI) {
1070-
return expandAtomicArithmeticOp(8, AVR::EORRdRr, MBB, MBBI);
1071-
}
1072-
1073-
template <>
1074-
bool AVRExpandPseudo::expand<AVR::AtomicLoadXor16>(Block &MBB, BlockIt MBBI) {
1075-
return expandAtomicArithmeticOp(16, AVR::EORWRdRr, MBB, MBBI);
1076-
}
1077-
10781000
template <>
10791001
bool AVRExpandPseudo::expand<AVR::AtomicFence>(Block &MBB, BlockIt MBBI) {
10801002
// On AVR, there is only one core and so atomic fences do nothing.
@@ -2256,6 +2178,7 @@ bool AVRExpandPseudo::expand<AVR::SPREAD>(Block &MBB, BlockIt MBBI) {
22562178

22572179
template <>
22582180
bool AVRExpandPseudo::expand<AVR::SPWRITE>(Block &MBB, BlockIt MBBI) {
2181+
const AVRSubtarget &STI = MBB.getParent()->getSubtarget<AVRSubtarget>();
22592182
MachineInstr &MI = *MBBI;
22602183
Register SrcLoReg, SrcHiReg;
22612184
Register SrcReg = MI.getOperand(1).getReg();
@@ -2265,7 +2188,7 @@ bool AVRExpandPseudo::expand<AVR::SPWRITE>(Block &MBB, BlockIt MBBI) {
22652188

22662189
buildMI(MBB, MBBI, AVR::INRdA)
22672190
.addReg(AVR::R0, RegState::Define)
2268-
.addImm(SREG_ADDR)
2191+
.addImm(STI.getIORegSREG())
22692192
.setMIFlags(Flags);
22702193

22712194
buildMI(MBB, MBBI, AVR::BCLRs).addImm(0x07).setMIFlags(Flags);
@@ -2276,7 +2199,7 @@ bool AVRExpandPseudo::expand<AVR::SPWRITE>(Block &MBB, BlockIt MBBI) {
22762199
.setMIFlags(Flags);
22772200

22782201
buildMI(MBB, MBBI, AVR::OUTARr)
2279-
.addImm(SREG_ADDR)
2202+
.addImm(STI.getIORegSREG())
22802203
.addReg(AVR::R0, RegState::Kill)
22812204
.setMIFlags(Flags);
22822205

@@ -2330,16 +2253,6 @@ bool AVRExpandPseudo::expandMI(Block &MBB, BlockIt MBBI) {
23302253
EXPAND(AVR::AtomicLoad16);
23312254
EXPAND(AVR::AtomicStore8);
23322255
EXPAND(AVR::AtomicStore16);
2333-
EXPAND(AVR::AtomicLoadAdd8);
2334-
EXPAND(AVR::AtomicLoadAdd16);
2335-
EXPAND(AVR::AtomicLoadSub8);
2336-
EXPAND(AVR::AtomicLoadSub16);
2337-
EXPAND(AVR::AtomicLoadAnd8);
2338-
EXPAND(AVR::AtomicLoadAnd16);
2339-
EXPAND(AVR::AtomicLoadOr8);
2340-
EXPAND(AVR::AtomicLoadOr16);
2341-
EXPAND(AVR::AtomicLoadXor8);
2342-
EXPAND(AVR::AtomicLoadXor16);
23432256
EXPAND(AVR::AtomicFence);
23442257
EXPAND(AVR::STSWKRr);
23452258
EXPAND(AVR::STWPtrRr);

llvm/lib/Target/AVR/AVRFrameLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ void AVRFrameLowering::emitPrologue(MachineFunction &MF,
7373
.setMIFlag(MachineInstr::FrameSetup);
7474

7575
BuildMI(MBB, MBBI, DL, TII.get(AVR::INRdA), AVR::R0)
76-
.addImm(0x3f)
76+
.addImm(STI.getIORegSREG())
7777
.setMIFlag(MachineInstr::FrameSetup);
7878
BuildMI(MBB, MBBI, DL, TII.get(AVR::PUSHRr))
7979
.addReg(AVR::R0, RegState::Kill)
@@ -144,7 +144,7 @@ static void restoreStatusRegister(MachineFunction &MF, MachineBasicBlock &MBB) {
144144
if (AFI->isInterruptOrSignalHandler()) {
145145
BuildMI(MBB, MBBI, DL, TII.get(AVR::POPRd), AVR::R0);
146146
BuildMI(MBB, MBBI, DL, TII.get(AVR::OUTARr))
147-
.addImm(0x3f)
147+
.addImm(STI.getIORegSREG())
148148
.addReg(AVR::R0, RegState::Kill);
149149
BuildMI(MBB, MBBI, DL, TII.get(AVR::POPWRd), AVR::R1R0);
150150
}

llvm/lib/Target/AVR/AVRISelLowering.cpp

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1707,6 +1707,60 @@ AVRTargetLowering::insertCopyR1(MachineInstr &MI, MachineBasicBlock *BB) const {
17071707
return BB;
17081708
}
17091709

1710+
// Lower atomicrmw operation to disable interrupts, do operation, and restore
1711+
// interrupts. This works because all AVR microcontrollers are single core.
1712+
MachineBasicBlock *AVRTargetLowering::insertAtomicArithmeticOp(
1713+
MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode, int Width) const {
1714+
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
1715+
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
1716+
MachineBasicBlock::iterator I(MI);
1717+
const Register SCRATCH_REGISTER = AVR::R0;
1718+
DebugLoc dl = MI.getDebugLoc();
1719+
1720+
// Example instruction sequence, for an atomic 8-bit add:
1721+
// ldi r25, 5
1722+
// in r0, SREG
1723+
// cli
1724+
// ld r24, X
1725+
// add r25, r24
1726+
// st X, r25
1727+
// out SREG, r0
1728+
1729+
const TargetRegisterClass *RC =
1730+
(Width == 8) ? &AVR::GPR8RegClass : &AVR::DREGSRegClass;
1731+
unsigned LoadOpcode = (Width == 8) ? AVR::LDRdPtr : AVR::LDWRdPtr;
1732+
unsigned StoreOpcode = (Width == 8) ? AVR::STPtrRr : AVR::STWPtrRr;
1733+
1734+
// Disable interrupts.
1735+
BuildMI(*BB, I, dl, TII.get(AVR::INRdA), SCRATCH_REGISTER)
1736+
.addImm(Subtarget.getIORegSREG());
1737+
BuildMI(*BB, I, dl, TII.get(AVR::BCLRs)).addImm(7);
1738+
1739+
// Load the original value.
1740+
BuildMI(*BB, I, dl, TII.get(LoadOpcode), MI.getOperand(0).getReg())
1741+
.add(MI.getOperand(1));
1742+
1743+
// Do the arithmetic operation.
1744+
Register Result = MRI.createVirtualRegister(RC);
1745+
BuildMI(*BB, I, dl, TII.get(Opcode), Result)
1746+
.addReg(MI.getOperand(0).getReg())
1747+
.add(MI.getOperand(2));
1748+
1749+
// Store the result.
1750+
BuildMI(*BB, I, dl, TII.get(StoreOpcode))
1751+
.add(MI.getOperand(1))
1752+
.addReg(Result);
1753+
1754+
// Restore interrupts.
1755+
BuildMI(*BB, I, dl, TII.get(AVR::OUTARr))
1756+
.addImm(Subtarget.getIORegSREG())
1757+
.addReg(SCRATCH_REGISTER);
1758+
1759+
// Remove the pseudo instruction.
1760+
MI.eraseFromParent();
1761+
return BB;
1762+
}
1763+
17101764
MachineBasicBlock *
17111765
AVRTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
17121766
MachineBasicBlock *MBB) const {
@@ -1731,6 +1785,26 @@ AVRTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
17311785
return insertMul(MI, MBB);
17321786
case AVR::CopyR1:
17331787
return insertCopyR1(MI, MBB);
1788+
case AVR::AtomicLoadAdd8:
1789+
return insertAtomicArithmeticOp(MI, MBB, AVR::ADDRdRr, 8);
1790+
case AVR::AtomicLoadAdd16:
1791+
return insertAtomicArithmeticOp(MI, MBB, AVR::ADDWRdRr, 16);
1792+
case AVR::AtomicLoadSub8:
1793+
return insertAtomicArithmeticOp(MI, MBB, AVR::SUBRdRr, 8);
1794+
case AVR::AtomicLoadSub16:
1795+
return insertAtomicArithmeticOp(MI, MBB, AVR::SUBWRdRr, 16);
1796+
case AVR::AtomicLoadAnd8:
1797+
return insertAtomicArithmeticOp(MI, MBB, AVR::ANDRdRr, 8);
1798+
case AVR::AtomicLoadAnd16:
1799+
return insertAtomicArithmeticOp(MI, MBB, AVR::ANDWRdRr, 16);
1800+
case AVR::AtomicLoadOr8:
1801+
return insertAtomicArithmeticOp(MI, MBB, AVR::ORRdRr, 8);
1802+
case AVR::AtomicLoadOr16:
1803+
return insertAtomicArithmeticOp(MI, MBB, AVR::ORWRdRr, 16);
1804+
case AVR::AtomicLoadXor8:
1805+
return insertAtomicArithmeticOp(MI, MBB, AVR::EORRdRr, 8);
1806+
case AVR::AtomicLoadXor16:
1807+
return insertAtomicArithmeticOp(MI, MBB, AVR::EORWRdRr, 16);
17341808
}
17351809

17361810
assert((Opc == AVR::Select16 || Opc == AVR::Select8) &&

llvm/lib/Target/AVR/AVRISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,9 @@ class AVRTargetLowering : public TargetLowering {
189189
MachineBasicBlock *insertMul(MachineInstr &MI, MachineBasicBlock *BB) const;
190190
MachineBasicBlock *insertCopyR1(MachineInstr &MI,
191191
MachineBasicBlock *BB) const;
192+
MachineBasicBlock *insertAtomicArithmeticOp(MachineInstr &MI,
193+
MachineBasicBlock *BB,
194+
unsigned Opcode, int Width) const;
192195
};
193196

194197
} // end namespace llvm

llvm/lib/Target/AVR/AVRInstrInfo.td

Lines changed: 26 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1446,27 +1446,14 @@ class AtomicStore<PatFrag Op, RegisterClass DRC, RegisterClass PTRRC>
14461446
: $rd, DRC
14471447
: $rr)]>;
14481448

1449-
let Constraints =
1450-
"@earlyclobber $rd" in class AtomicLoadOp<PatFrag Op, RegisterClass DRC,
1451-
RegisterClass PTRRC>
1452-
: Pseudo<(outs DRC
1453-
: $rd),
1454-
(ins PTRRC
1455-
: $rr, DRC
1456-
: $operand),
1457-
"atomic_op", [(set DRC
1458-
: $rd, (Op i16
1459-
: $rr, DRC
1460-
: $operand))]>;
1461-
1462-
// FIXME: I think 16-bit atomic binary ops need to mark
1463-
// r0 as clobbered.
1449+
class AtomicLoadOp<PatFrag Op, RegisterClass DRC, RegisterClass PTRRC>
1450+
: Pseudo<(outs DRC:$rd),
1451+
(ins PTRRC:$rr, DRC:$operand),
1452+
"atomic_op", [(set DRC:$rd, (Op i16:$rr, DRC:$operand))]>;
14641453

14651454
// Atomic instructions
14661455
// ===================
14671456
//
1468-
// These are all expanded by AVRExpandPseudoInsts
1469-
//
14701457
// 8-bit operations can use any pointer register because
14711458
// they are expanded directly into an LD/ST instruction.
14721459
//
@@ -1482,16 +1469,18 @@ def AtomicStore16 : AtomicStore<atomic_store_16, DREGS, PTRDISPREGS>;
14821469
class AtomicLoadOp8<PatFrag Op> : AtomicLoadOp<Op, GPR8, PTRREGS>;
14831470
class AtomicLoadOp16<PatFrag Op> : AtomicLoadOp<Op, DREGS, PTRDISPREGS>;
14841471

1485-
def AtomicLoadAdd8 : AtomicLoadOp8<atomic_load_add_8>;
1486-
def AtomicLoadAdd16 : AtomicLoadOp16<atomic_load_add_16>;
1487-
def AtomicLoadSub8 : AtomicLoadOp8<atomic_load_sub_8>;
1488-
def AtomicLoadSub16 : AtomicLoadOp16<atomic_load_sub_16>;
1489-
def AtomicLoadAnd8 : AtomicLoadOp8<atomic_load_and_8>;
1490-
def AtomicLoadAnd16 : AtomicLoadOp16<atomic_load_and_16>;
1491-
def AtomicLoadOr8 : AtomicLoadOp8<atomic_load_or_8>;
1492-
def AtomicLoadOr16 : AtomicLoadOp16<atomic_load_or_16>;
1493-
def AtomicLoadXor8 : AtomicLoadOp8<atomic_load_xor_8>;
1494-
def AtomicLoadXor16 : AtomicLoadOp16<atomic_load_xor_16>;
1472+
let usesCustomInserter=1 in {
1473+
def AtomicLoadAdd8 : AtomicLoadOp8<atomic_load_add_8>;
1474+
def AtomicLoadAdd16 : AtomicLoadOp16<atomic_load_add_16>;
1475+
def AtomicLoadSub8 : AtomicLoadOp8<atomic_load_sub_8>;
1476+
def AtomicLoadSub16 : AtomicLoadOp16<atomic_load_sub_16>;
1477+
def AtomicLoadAnd8 : AtomicLoadOp8<atomic_load_and_8>;
1478+
def AtomicLoadAnd16 : AtomicLoadOp16<atomic_load_and_16>;
1479+
def AtomicLoadOr8 : AtomicLoadOp8<atomic_load_or_8>;
1480+
def AtomicLoadOr16 : AtomicLoadOp16<atomic_load_or_16>;
1481+
def AtomicLoadXor8 : AtomicLoadOp8<atomic_load_xor_8>;
1482+
def AtomicLoadXor16 : AtomicLoadOp16<atomic_load_xor_16>;
1483+
}
14951484
def AtomicFence
14961485
: Pseudo<(outs), (ins), "atomic_fence", [(atomic_fence timm, timm)]>;
14971486

@@ -2122,15 +2111,17 @@ def ROL : InstAlias<"rol\t$rd", (ADCRdRr GPR8 : $rd, GPR8 : $rd)>;
21222111
// Sets all bits in a register.
21232112
def : InstAlias<"ser\t$rd", (LDIRdK LD8 : $rd, 0xff), 0>;
21242113

2125-
let Defs = [SREG] in def BSETs : FS<0, (outs),
2126-
(ins i8imm
2127-
: $s),
2128-
"bset\t$s", []>;
2114+
let hasSideEffects=1 in {
2115+
let Defs = [SREG] in def BSETs : FS<0,
2116+
(outs),
2117+
(ins i8imm:$s),
2118+
"bset\t$s", []>;
21292119

2130-
let Defs = [SREG] in def BCLRs : FS<1, (outs),
2131-
(ins i8imm
2132-
: $s),
2133-
"bclr\t$s", []>;
2120+
let Defs = [SREG] in def BCLRs : FS<1,
2121+
(outs),
2122+
(ins i8imm:$s),
2123+
"bclr\t$s", []>;
2124+
}
21342125

21352126
// Set/clear aliases for the carry (C) status flag (bit 0).
21362127
def : InstAlias<"sec", (BSETs 0)>;

0 commit comments

Comments
 (0)