diff --git a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp --- a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp @@ -86,9 +86,6 @@ bool expandAtomicBinaryOp(unsigned Opcode, Block &MBB, BlockIt MBBI); - bool expandAtomicArithmeticOp(unsigned MemOpcode, unsigned ArithOpcode, - Block &MBB, BlockIt MBBI); - /// Specific shift implementation. bool expandLSLB7Rd(Block &MBB, BlockIt MBBI); bool expandLSRB7Rd(Block &MBB, BlockIt MBBI); @@ -902,31 +899,6 @@ return expandAtomicBinaryOp(Opcode, MBB, MBBI, [](MachineInstr &MI) {}); } -bool AVRExpandPseudo::expandAtomicArithmeticOp(unsigned Width, - unsigned ArithOpcode, Block &MBB, - BlockIt MBBI) { - return expandAtomic(MBB, MBBI, [&](MachineInstr &MI) { - auto DstReg = MI.getOperand(0).getReg(); - auto PtrOp = MI.getOperand(1); - auto SrcReg = MI.getOperand(2).getReg(); - - unsigned LoadOpcode = (Width == 8) ? AVR::LDRdPtr : AVR::LDWRdPtr; - unsigned StoreOpcode = (Width == 8) ? AVR::STPtrRr : AVR::STWPtrRr; - - // FIXME: this returns the new value (after the operation), not the old - // value as the atomicrmw instruction is supposed to do! - - // Create the load - buildMI(MBB, MBBI, LoadOpcode, DstReg).addReg(PtrOp.getReg()); - - // Create the arithmetic op - buildMI(MBB, MBBI, ArithOpcode, DstReg).addReg(DstReg).addReg(SrcReg); - - // Create the store - buildMI(MBB, MBBI, StoreOpcode).add(PtrOp).addReg(DstReg); - }); -} - Register AVRExpandPseudo::scavengeGPR8(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); RegScavenger RS; @@ -972,56 +944,6 @@ return expandAtomicBinaryOp(AVR::STWPtrRr, MBB, MBBI); } -template <> -bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { - return expandAtomicArithmeticOp(8, AVR::ADDRdRr, MBB, MBBI); -} - -template <> -bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { - return expandAtomicArithmeticOp(16, AVR::ADDWRdRr, MBB, MBBI); -} - -template <> -bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { - return expandAtomicArithmeticOp(8, AVR::SUBRdRr, MBB, MBBI); -} - -template <> -bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { - return expandAtomicArithmeticOp(16, AVR::SUBWRdRr, MBB, MBBI); -} - -template <> -bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { - return expandAtomicArithmeticOp(8, AVR::ANDRdRr, MBB, MBBI); -} - -template <> -bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { - return expandAtomicArithmeticOp(16, AVR::ANDWRdRr, MBB, MBBI); -} - -template <> -bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { - return expandAtomicArithmeticOp(8, AVR::ORRdRr, MBB, MBBI); -} - -template <> -bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { - return expandAtomicArithmeticOp(16, AVR::ORWRdRr, MBB, MBBI); -} - -template <> -bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { - return expandAtomicArithmeticOp(8, AVR::EORRdRr, MBB, MBBI); -} - -template <> -bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { - return expandAtomicArithmeticOp(16, AVR::EORWRdRr, MBB, MBBI); -} - template <> bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { // On AVR, there is only one core and so atomic fences do nothing. @@ -2273,16 +2195,6 @@ EXPAND(AVR::AtomicLoad16); EXPAND(AVR::AtomicStore8); EXPAND(AVR::AtomicStore16); - EXPAND(AVR::AtomicLoadAdd8); - EXPAND(AVR::AtomicLoadAdd16); - EXPAND(AVR::AtomicLoadSub8); - EXPAND(AVR::AtomicLoadSub16); - EXPAND(AVR::AtomicLoadAnd8); - EXPAND(AVR::AtomicLoadAnd16); - EXPAND(AVR::AtomicLoadOr8); - EXPAND(AVR::AtomicLoadOr16); - EXPAND(AVR::AtomicLoadXor8); - EXPAND(AVR::AtomicLoadXor16); EXPAND(AVR::AtomicFence); EXPAND(AVR::STSWKRr); EXPAND(AVR::STWPtrRr); diff --git a/llvm/lib/Target/AVR/AVRISelLowering.h b/llvm/lib/Target/AVR/AVRISelLowering.h --- a/llvm/lib/Target/AVR/AVRISelLowering.h +++ b/llvm/lib/Target/AVR/AVRISelLowering.h @@ -187,6 +187,9 @@ private: MachineBasicBlock *insertShift(MachineInstr &MI, MachineBasicBlock *BB) const; MachineBasicBlock *insertMul(MachineInstr &MI, MachineBasicBlock *BB) const; + MachineBasicBlock *insertAtomicArithmeticOp(MachineInstr &MI, + MachineBasicBlock *BB, + unsigned Opcode, int Width) const; }; } // end namespace llvm diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp --- a/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -1695,6 +1695,60 @@ return BB; } +// Lower atomicrmw operation to disable interrupts, do operation, and restore +// interrupts. This works because all AVR microcontrollers are single core. +MachineBasicBlock *AVRTargetLowering::insertAtomicArithmeticOp( + MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode, int Width) const { + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); + MachineBasicBlock::iterator I(MI); + const Register SCRATCH_REGISTER = AVR::R0; + const unsigned SREG_ADDR = 0x3f; + DebugLoc dl = MI.getDebugLoc(); + + // Example instruction sequence, for an atomic 8-bit add: + // ldi r25, 5 + // in r0, SREG + // cli + // ld r24, X + // add r25, r24 + // st X, r25 + // out SREG, r0 + + const TargetRegisterClass *RC = + (Width == 8) ? &AVR::GPR8RegClass : &AVR::DREGSRegClass; + unsigned LoadOpcode = (Width == 8) ? AVR::LDRdPtr : AVR::LDWRdPtr; + unsigned StoreOpcode = (Width == 8) ? AVR::STPtrRr : AVR::STWPtrRr; + + // Disable interrupts. + BuildMI(*BB, I, dl, TII.get(AVR::INRdA), SCRATCH_REGISTER).addImm(SREG_ADDR); + BuildMI(*BB, I, dl, TII.get(AVR::BCLRs)).addImm(7); + + // Load the original value. + BuildMI(*BB, I, dl, TII.get(LoadOpcode), MI.getOperand(0).getReg()) + .add(MI.getOperand(1)); + + // Do the arithmetic operation. + Register Result = MRI.createVirtualRegister(RC); + BuildMI(*BB, I, dl, TII.get(Opcode), Result) + .addReg(MI.getOperand(0).getReg()) + .add(MI.getOperand(2)); + + // Store the result. + BuildMI(*BB, I, dl, TII.get(StoreOpcode)) + .add(MI.getOperand(1)) + .addReg(Result); + + // Restore interrupts. + BuildMI(*BB, I, dl, TII.get(AVR::OUTARr)) + .addImm(SREG_ADDR) + .addReg(SCRATCH_REGISTER); + + // Remove the pseudo instruction. + MI.eraseFromParent(); + return BB; +} + MachineBasicBlock * AVRTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const { @@ -1717,6 +1771,26 @@ case AVR::MULRdRr: case AVR::MULSRdRr: return insertMul(MI, MBB); + case AVR::AtomicLoadAdd8: + return insertAtomicArithmeticOp(MI, MBB, AVR::ADDRdRr, 8); + case AVR::AtomicLoadAdd16: + return insertAtomicArithmeticOp(MI, MBB, AVR::ADDWRdRr, 16); + case AVR::AtomicLoadSub8: + return insertAtomicArithmeticOp(MI, MBB, AVR::SUBRdRr, 8); + case AVR::AtomicLoadSub16: + return insertAtomicArithmeticOp(MI, MBB, AVR::SUBWRdRr, 16); + case AVR::AtomicLoadAnd8: + return insertAtomicArithmeticOp(MI, MBB, AVR::ANDRdRr, 8); + case AVR::AtomicLoadAnd16: + return insertAtomicArithmeticOp(MI, MBB, AVR::ANDWRdRr, 16); + case AVR::AtomicLoadOr8: + return insertAtomicArithmeticOp(MI, MBB, AVR::ORRdRr, 8); + case AVR::AtomicLoadOr16: + return insertAtomicArithmeticOp(MI, MBB, AVR::ORWRdRr, 16); + case AVR::AtomicLoadXor8: + return insertAtomicArithmeticOp(MI, MBB, AVR::EORRdRr, 8); + case AVR::AtomicLoadXor16: + return insertAtomicArithmeticOp(MI, MBB, AVR::EORWRdRr, 16); } assert((Opc == AVR::Select16 || Opc == AVR::Select8) && diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td --- a/llvm/lib/Target/AVR/AVRInstrInfo.td +++ b/llvm/lib/Target/AVR/AVRInstrInfo.td @@ -1446,27 +1446,14 @@ : $rd, DRC : $rr)]>; -let Constraints = - "@earlyclobber $rd" in class AtomicLoadOp - : Pseudo<(outs DRC - : $rd), - (ins PTRRC - : $rr, DRC - : $operand), - "atomic_op", [(set DRC - : $rd, (Op i16 - : $rr, DRC - : $operand))]>; - -// FIXME: I think 16-bit atomic binary ops need to mark -// r0 as clobbered. +class AtomicLoadOp + : Pseudo<(outs DRC:$rd), + (ins PTRRC:$rr, DRC:$operand), + "atomic_op", [(set DRC:$rd, (Op i16:$rr, DRC:$operand))]>; // Atomic instructions // =================== // -// These are all expanded by AVRExpandPseudoInsts -// // 8-bit operations can use any pointer register because // they are expanded directly into an LD/ST instruction. // @@ -1482,16 +1469,18 @@ class AtomicLoadOp8 : AtomicLoadOp; class AtomicLoadOp16 : AtomicLoadOp; -def AtomicLoadAdd8 : AtomicLoadOp8; -def AtomicLoadAdd16 : AtomicLoadOp16; -def AtomicLoadSub8 : AtomicLoadOp8; -def AtomicLoadSub16 : AtomicLoadOp16; -def AtomicLoadAnd8 : AtomicLoadOp8; -def AtomicLoadAnd16 : AtomicLoadOp16; -def AtomicLoadOr8 : AtomicLoadOp8; -def AtomicLoadOr16 : AtomicLoadOp16; -def AtomicLoadXor8 : AtomicLoadOp8; -def AtomicLoadXor16 : AtomicLoadOp16; +let usesCustomInserter=1 in { + def AtomicLoadAdd8 : AtomicLoadOp8; + def AtomicLoadAdd16 : AtomicLoadOp16; + def AtomicLoadSub8 : AtomicLoadOp8; + def AtomicLoadSub16 : AtomicLoadOp16; + def AtomicLoadAnd8 : AtomicLoadOp8; + def AtomicLoadAnd16 : AtomicLoadOp16; + def AtomicLoadOr8 : AtomicLoadOp8; + def AtomicLoadOr16 : AtomicLoadOp16; + def AtomicLoadXor8 : AtomicLoadOp8; + def AtomicLoadXor16 : AtomicLoadOp16; +} def AtomicFence : Pseudo<(outs), (ins), "atomic_fence", [(atomic_fence timm, timm)]>; @@ -2109,15 +2098,17 @@ // Sets all bits in a register. def : InstAlias<"ser\t$rd", (LDIRdK LD8 : $rd, 0xff), 0>; -let Defs = [SREG] in def BSETs : FS<0, (outs), - (ins i8imm - : $s), - "bset\t$s", []>; +let hasSideEffects=1 in { + let Defs = [SREG] in def BSETs : FS<0, + (outs), + (ins i8imm:$s), + "bset\t$s", []>; -let Defs = [SREG] in def BCLRs : FS<1, (outs), - (ins i8imm - : $s), - "bclr\t$s", []>; + let Defs = [SREG] in def BCLRs : FS<1, + (outs), + (ins i8imm:$s), + "bclr\t$s", []>; +} // Set/clear aliases for the carry (C) status flag (bit 0). def : InstAlias<"sec", (BSETs 0)>; diff --git a/llvm/test/CodeGen/AVR/atomics/load16.ll b/llvm/test/CodeGen/AVR/atomics/load16.ll --- a/llvm/test/CodeGen/AVR/atomics/load16.ll +++ b/llvm/test/CodeGen/AVR/atomics/load16.ll @@ -31,10 +31,10 @@ ; CHECK-NEXT: cli ; CHECK-NEXT: ld [[RR1:r[0-9]+]], [[RD:(X|Y|Z)]] ; CHECK-NEXT: ldd [[RR2:r[0-9]+]], [[RD]]+1 -; CHECK-NEXT: add [[RR1]], [[TMP:r[0-9]+]] -; CHECK-NEXT: adc [[RR2]], [[TMP:r[0-9]+]] -; CHECK-NEXT: st [[RD]], [[RR1]] -; CHECK-NEXT: std [[RD]]+1, [[A:r[0-9]+]] +; CHECK-NEXT: add [[TMP1:r[0-9]+]], [[RR1]] +; CHECK-NEXT: adc [[TMP2:r[0-9]+]], [[RR2]] +; CHECK-NEXT: st [[RD]], [[TMP1]] +; CHECK-NEXT: std [[RD]]+1, [[TMP2]] ; CHECK-NEXT: out 63, r0 define i16 @atomic_load_add16(i16* %foo) { %val = atomicrmw add i16* %foo, i16 13 seq_cst @@ -46,10 +46,11 @@ ; CHECK-NEXT: cli ; CHECK-NEXT: ld [[RR1:r[0-9]+]], [[RD:(X|Y|Z)]] ; CHECK-NEXT: ldd [[RR2:r[0-9]+]], [[RD]]+1 -; CHECK-NEXT: sub [[RR1]], [[TMP:r[0-9]+]] -; CHECK-NEXT: sbc [[RR2]], [[TMP:r[0-9]+]] -; CHECK-NEXT: st [[RD]], [[RR1]] -; CHECK-NEXT: std [[RD]]+1, [[A:r[0-9]+]] +; CHECK-NEXT: movw +; CHECK-NEXT: sub [[TMP1:r[0-9]+]], [[IN1:r[0-9]+]] +; CHECK-NEXT: sbc [[TMP2:r[0-9]+]], [[IN2:r[0-9]+]] +; CHECK-NEXT: st [[RD]], [[TMP1]] +; CHECK-NEXT: std [[RD]]+1, [[TMP2]] ; CHECK-NEXT: out 63, r0 define i16 @atomic_load_sub16(i16* %foo) { %val = atomicrmw sub i16* %foo, i16 13 seq_cst @@ -61,10 +62,10 @@ ; CHECK-NEXT: cli ; CHECK-NEXT: ld [[RR1:r[0-9]+]], [[RD:(X|Y|Z)]] ; CHECK-NEXT: ldd [[RR2:r[0-9]+]], [[RD]]+1 -; CHECK-NEXT: and [[RR1]], [[TMP:r[0-9]+]] -; CHECK-NEXT: and [[RR2]], [[TMP:r[0-9]+]] -; CHECK-NEXT: st [[RD]], [[RR1]] -; CHECK-NEXT: std [[RD]]+1, [[A:r[0-9]+]] +; CHECK-NEXT: and [[TMP1:r[0-9]+]], [[RR1]] +; CHECK-NEXT: and [[TMP2:r[0-9]+]], [[RR2]] +; CHECK-NEXT: st [[RD]], [[TMP1]] +; CHECK-NEXT: std [[RD]]+1, [[TMP2]] ; CHECK-NEXT: out 63, r0 define i16 @atomic_load_and16(i16* %foo) { %val = atomicrmw and i16* %foo, i16 13 seq_cst @@ -76,10 +77,10 @@ ; CHECK-NEXT: cli ; CHECK-NEXT: ld [[RR1:r[0-9]+]], [[RD:(X|Y|Z)]] ; CHECK-NEXT: ldd [[RR2:r[0-9]+]], [[RD]]+1 -; CHECK-NEXT: or [[RR1]], [[TMP:r[0-9]+]] -; CHECK-NEXT: or [[RR2]], [[TMP:r[0-9]+]] -; CHECK-NEXT: st [[RD]], [[RR1]] -; CHECK-NEXT: std [[RD]]+1, [[A:r[0-9]+]] +; CHECK-NEXT: or [[TMP1:r[0-9]+]], [[RR1]] +; CHECK-NEXT: or [[TMP2:r[0-9]+]], [[RR2]] +; CHECK-NEXT: st [[RD]], [[TMP1]] +; CHECK-NEXT: std [[RD]]+1, [[TMP2]] ; CHECK-NEXT: out 63, r0 define i16 @atomic_load_or16(i16* %foo) { %val = atomicrmw or i16* %foo, i16 13 seq_cst @@ -91,10 +92,10 @@ ; CHECK-NEXT: cli ; CHECK-NEXT: ld [[RR1:r[0-9]+]], [[RD:(X|Y|Z)]] ; CHECK-NEXT: ldd [[RR2:r[0-9]+]], [[RD]]+1 -; CHECK-NEXT: eor [[RR1]], [[TMP:r[0-9]+]] -; CHECK-NEXT: eor [[RR2]], [[TMP:r[0-9]+]] -; CHECK-NEXT: st [[RD]], [[RR1]] -; CHECK-NEXT: std [[RD]]+1, [[A:r[0-9]+]] +; CHECK-NEXT: eor [[TMP1:r[0-9]+]], [[RR1]] +; CHECK-NEXT: eor [[TMP2:r[0-9]+]], [[RR2]] +; CHECK-NEXT: st [[RD]], [[TMP1]] +; CHECK-NEXT: std [[RD]]+1, [[TMP2]] ; CHECK-NEXT: out 63, r0 define i16 @atomic_load_xor16(i16* %foo) { %val = atomicrmw xor i16* %foo, i16 13 seq_cst diff --git a/llvm/test/CodeGen/AVR/atomics/load8.ll b/llvm/test/CodeGen/AVR/atomics/load8.ll --- a/llvm/test/CodeGen/AVR/atomics/load8.ll +++ b/llvm/test/CodeGen/AVR/atomics/load8.ll @@ -31,8 +31,8 @@ ; CHECK: in r0, 63 ; CHECK-NEXT: cli ; CHECK-NEXT: ld [[RD:r[0-9]+]], [[RR:(X|Y|Z)]] -; CHECK-NEXT: add [[RD]], [[RR1:r[0-9]+]] -; CHECK-NEXT: st [[RR]], [[RD]] +; CHECK-NEXT: add [[RR1:r[0-9]+]], [[RD]] +; CHECK-NEXT: st [[RR]], [[RR1]] ; CHECK-NEXT: out 63, r0 define i8 @atomic_load_add8(i8* %foo) { %val = atomicrmw add i8* %foo, i8 13 seq_cst @@ -43,8 +43,9 @@ ; CHECK: in r0, 63 ; CHECK-NEXT: cli ; CHECK-NEXT: ld [[RD:r[0-9]+]], [[RR:(X|Y|Z)]] -; CHECK-NEXT: sub [[RD]], [[RR1:r[0-9]+]] -; CHECK-NEXT: st [[RR]], [[RD]] +; CHECK-NEXT: mov [[RR1:r[0-9]+]], [[RD]] +; CHECK-NEXT: sub [[RR1]], [[RR2:r[0-9]+]] +; CHECK-NEXT: st [[RR]], [[RR1]] ; CHECK-NEXT: out 63, r0 define i8 @atomic_load_sub8(i8* %foo) { %val = atomicrmw sub i8* %foo, i8 13 seq_cst @@ -55,8 +56,8 @@ ; CHECK: in r0, 63 ; CHECK-NEXT: cli ; CHECK-NEXT: ld [[RD:r[0-9]+]], [[RR:(X|Y|Z)]] -; CHECK-NEXT: and [[RD]], [[RR1:r[0-9]+]] -; CHECK-NEXT: st [[RR]], [[RD]] +; CHECK-NEXT: and [[RR1:r[0-9]+]], [[RD]] +; CHECK-NEXT: st [[RR]], [[RR1]] ; CHECK-NEXT: out 63, r0 define i8 @atomic_load_and8(i8* %foo) { %val = atomicrmw and i8* %foo, i8 13 seq_cst @@ -67,8 +68,8 @@ ; CHECK: in r0, 63 ; CHECK-NEXT: cli ; CHECK-NEXT: ld [[RD:r[0-9]+]], [[RR:(X|Y|Z)]] -; CHECK-NEXT: or [[RD]], [[RR1:r[0-9]+]] -; CHECK-NEXT: st [[RR]], [[RD]] +; CHECK-NEXT: or [[RR1:r[0-9]+]], [[RD]] +; CHECK-NEXT: st [[RR]], [[RR1]] ; CHECK-NEXT: out 63, r0 define i8 @atomic_load_or8(i8* %foo) { %val = atomicrmw or i8* %foo, i8 13 seq_cst @@ -79,8 +80,8 @@ ; CHECK: in r0, 63 ; CHECK-NEXT: cli ; CHECK-NEXT: ld [[RD:r[0-9]+]], [[RR:(X|Y|Z)]] -; CHECK-NEXT: eor [[RD]], [[RR1:r[0-9]+]] -; CHECK-NEXT: st [[RR]], [[RD]] +; CHECK-NEXT: eor [[RR1:r[0-9]+]], [[RD]] +; CHECK-NEXT: st [[RR]], [[RR1]] ; CHECK-NEXT: out 63, r0 define i8 @atomic_load_xor8(i8* %foo) { %val = atomicrmw xor i8* %foo, i8 13 seq_cst