Index: lib/Target/Mips/CMakeLists.txt =================================================================== --- lib/Target/Mips/CMakeLists.txt +++ lib/Target/Mips/CMakeLists.txt @@ -26,6 +26,7 @@ MipsCCState.cpp MipsConstantIslandPass.cpp MipsDelaySlotFiller.cpp + MipsExpandPseudo.cpp MipsFastISel.cpp MipsHazardSchedule.cpp MipsInstrInfo.cpp Index: lib/Target/Mips/Mips.h =================================================================== --- lib/Target/Mips/Mips.h +++ lib/Target/Mips/Mips.h @@ -32,6 +32,7 @@ FunctionPass *createMipsHazardSchedule(); FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM); FunctionPass *createMipsConstantIslandPass(); + FunctionPass *createMipsExpandPseudoPass(); } // end namespace llvm; #endif Index: lib/Target/Mips/Mips64InstrInfo.td =================================================================== --- lib/Target/Mips/Mips64InstrInfo.td +++ lib/Target/Mips/Mips64InstrInfo.td @@ -73,6 +73,9 @@ def ATOMIC_LOAD_XOR_I64 : Atomic2Ops; def ATOMIC_LOAD_NAND_I64 : Atomic2Ops; def ATOMIC_SWAP_I64 : Atomic2Ops; +} + +let isPseudo = 1 in { def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap; } Index: lib/Target/Mips/MipsExpandPseudo.cpp =================================================================== --- /dev/null +++ lib/Target/Mips/MipsExpandPseudo.cpp @@ -0,0 +1,346 @@ +//===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that expands pseudo instructions into target +// instructions to allow proper scheduling, if-conversion, and other late +// optimizations. This pass should be run after register allocation but before +// the post-regalloc scheduling pass. +// +//===----------------------------------------------------------------------===// + +#include "Mips.h" +#include "MipsInstrInfo.h" +#include "MipsSubtarget.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "mips-pseudo" + +namespace { + class MipsExpandPseudo : public MachineFunctionPass { + public: + static char ID; + MipsExpandPseudo() : MachineFunctionPass(ID) {} + + const MipsInstrInfo *TII; + const MipsSubtarget *STI; + + bool runOnMachineFunction(MachineFunction &Fn) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + + StringRef getPassName() const override { + return "Mips pseudo instruction expansion pass"; + } + + private: + bool expandAtomicCmpSwap(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandAtomicCmpSwapSubword(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NMBB); + bool expandMBB(MachineBasicBlock &MBB); + }; + char MipsExpandPseudo::ID = 0; +} + +static void addPostLoopLiveIns(MachineBasicBlock *MBB, LivePhysRegs &LiveRegs) { + for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I) + MBB->addLiveIn(*I); +} + +bool MipsExpandPseudo::expandAtomicCmpSwapSubword( + MachineBasicBlock &BB, MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI) { + + MachineFunction *MF = BB.getParent(); + + const bool ArePtrs64bit = STI->getABI().ArePtrs64bit(); + DebugLoc DL = I->getDebugLoc(); + unsigned LL, SC; + + unsigned ZERO = Mips::ZERO; + unsigned BNE = Mips::BNE; + unsigned BEQ = Mips::BEQ; + unsigned SEOp = + I->getOpcode() == Mips::ATOMIC_CMP_SWAP_I8_FRAG ? Mips::SEB : Mips::SEH; + + if (STI->inMicroMipsMode()) { + LL = Mips::LL_MM; + SC = Mips::SC_MM; + } else { + LL = STI->hasMips32r6() ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6) + : (ArePtrs64bit ? Mips::LL64 : Mips::LL); + SC = STI->hasMips32r6() ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6) + : (ArePtrs64bit ? Mips::SC64 : Mips::SC); + } + + unsigned Dest = I->getOperand(0).getReg(); + unsigned Ptr = I->getOperand(1).getReg(); + unsigned Mask = I->getOperand(2).getReg(); + unsigned ShiftCmpVal = I->getOperand(3).getReg(); + unsigned Mask2 = I->getOperand(4).getReg(); + unsigned ShiftNewVal = I->getOperand(5).getReg(); + unsigned ShiftAmnt = I->getOperand(6).getReg(); + + LivePhysRegs LiveRegs(&TII->getRegisterInfo()); +// LiveRegs.addLiveOuts(BB); + for (auto MBBI = std::prev(BB.end()); MBBI != I; --MBBI) + LiveRegs.stepBackward(*MBBI); + + // insert new blocks after the current block + const BasicBlock *LLVM_BB = BB.getBasicBlock(); + MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineFunction::iterator It = ++BB.getIterator(); + MF->insert(It, loop1MBB); + MF->insert(It, loop2MBB); + MF->insert(It, sinkMBB); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), &BB, + std::next(MachineBasicBlock::iterator(I)), BB.end()); + exitMBB->transferSuccessorsAndUpdatePHIs(&BB); + + // thisMBB: + // ... + // fallthrough --> loop1MBB + BB.addSuccessor(loop1MBB, BranchProbability::getOne()); + loop1MBB->addSuccessor(sinkMBB); + loop1MBB->addSuccessor(loop2MBB); + loop2MBB->addSuccessor(loop1MBB); + loop2MBB->addSuccessor(sinkMBB); + sinkMBB->addSuccessor(exitMBB, BranchProbability::getOne()); + + // loop1MBB: + // ll dest, 0(ptr) + // and Mask', dest, Mask + // bne Mask', ShiftCmpVal, exitMBB + BuildMI(loop1MBB, DL, TII->get(LL), Dest).addReg(Ptr).addImm(0); + BuildMI(loop1MBB, DL, TII->get(Mips::AND), Mask) + .addReg(Dest) + .addReg(Mask); + BuildMI(loop1MBB, DL, TII->get(BNE)) + .addReg(Mask).addReg(ShiftCmpVal).addMBB(sinkMBB); + loop1MBB->addLiveIn(Ptr); + loop1MBB->addLiveIn(Mask); + loop1MBB->addLiveIn(ShiftCmpVal); + + // loop2MBB: + // and dest, dest, mask2 + // or dest, dest, ShiftNewVal + // sc dest, dest, 0(ptr) + // beq dest, $0, loop1MBB + BuildMI(loop2MBB, DL, TII->get(Mips::AND), Dest) + .addReg(Dest, RegState::Kill) + .addReg(Mask2); + BuildMI(loop2MBB, DL, TII->get(Mips::OR), Dest) + .addReg(Dest, RegState::Kill) + .addReg(ShiftNewVal); + BuildMI(loop2MBB, DL, TII->get(SC), Dest) + .addReg(Dest, RegState::Kill) + .addReg(Ptr) + .addImm(0); + BuildMI(loop2MBB, DL, TII->get(BEQ)) + .addReg(Dest, RegState::Kill) + .addReg(ZERO) + .addMBB(loop1MBB); + loop2MBB->addLiveIn(Ptr); + loop2MBB->addLiveIn(Mask2); + loop2MBB->addLiveIn(Dest); + loop2MBB->addLiveIn(ShiftNewVal); + + // sinkMBB: + // srl srlres, Mask', shiftamt + // sign_extend dest,srlres + BuildMI(sinkMBB, DL, TII->get(Mips::SRLV), Dest) + .addReg(Mask) + .addReg(ShiftAmnt); + if (STI->hasMips32r2()) { + BuildMI(sinkMBB, DL, TII->get(SEOp), Dest).addReg(Dest); + } else { + const unsigned ShiftImm = + I->getOpcode() == Mips::ATOMIC_CMP_SWAP_I16_FRAG ? 16 : 24; + BuildMI(sinkMBB, DL, TII->get(Mips::SLL), Dest) + .addReg(Dest, RegState::Kill) + .addImm(ShiftImm); + BuildMI(sinkMBB, DL, TII->get(Mips::SRA), Dest) + .addReg(Dest, RegState::Kill) + .addImm(ShiftImm); + } + sinkMBB->addLiveIn(Mask); + sinkMBB->addLiveIn(ShiftAmnt); + + addPostLoopLiveIns(exitMBB, LiveRegs); + exitMBB->addLiveIn(Dest); + + NMBBI = BB.end(); + I->eraseFromParent(); + return true; +} + +bool MipsExpandPseudo::expandAtomicCmpSwap(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI) { + + const unsigned Size = I->getOpcode() == Mips::ATOMIC_CMP_SWAP_I32 ? 4 : 8; + MachineFunction *MF = BB.getParent(); + + const bool ArePtrs64bit = STI->getABI().ArePtrs64bit(); + DebugLoc DL = I->getDebugLoc(); + + LivePhysRegs LiveRegs(&TII->getRegisterInfo()); + LiveRegs.addLiveOuts(BB); + for (auto MBBI = std::prev(BB.end()); MBBI != I; --MBBI) + LiveRegs.stepBackward(*MBBI); + + unsigned LL, SC, ZERO, BNE, BEQ; + + if (Size == 4) { + if (STI->inMicroMipsMode()) { + LL = Mips::LL_MM; + SC = Mips::SC_MM; + } else { + LL = STI->hasMips32r6() + ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6) + : (ArePtrs64bit ? Mips::LL64 : Mips::LL); + SC = STI->hasMips32r6() + ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6) + : (ArePtrs64bit ? Mips::SC64 : Mips::SC); + } + + ZERO = Mips::ZERO; + BNE = Mips::BNE; + BEQ = Mips::BEQ; + } else { + LL = STI->hasMips64r6() ? Mips::LLD_R6 : Mips::LLD; + SC = STI->hasMips64r6() ? Mips::SCD_R6 : Mips::SCD; + ZERO = Mips::ZERO_64; + BNE = Mips::BNE64; + BEQ = Mips::BEQ64; + } + + unsigned Dest = I->getOperand(0).getReg(); + unsigned Ptr = I->getOperand(1).getReg(); + unsigned OldVal = I->getOperand(2).getReg(); + unsigned NewVal = I->getOperand(3).getReg(); + + // insert new blocks after the current block + const BasicBlock *LLVM_BB = BB.getBasicBlock(); + MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineFunction::iterator It = ++BB.getIterator(); + MF->insert(It, loop1MBB); + MF->insert(It, loop2MBB); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), &BB, + std::next(MachineBasicBlock::iterator(I)), BB.end()); + exitMBB->transferSuccessorsAndUpdatePHIs(&BB); + + // thisMBB: + // ... + // fallthrough --> loop1MBB + BB.addSuccessor(loop1MBB, BranchProbability::getOne()); + loop1MBB->addSuccessor(exitMBB); + loop1MBB->addSuccessor(loop2MBB); + loop2MBB->addSuccessor(loop1MBB); + loop2MBB->addSuccessor(exitMBB); + + // loop1MBB: + // ll dest, 0(ptr) + // bne dest, oldval, exitMBB + BuildMI(loop1MBB, DL, TII->get(LL), Dest).addReg(Ptr).addImm(0); + BuildMI(loop1MBB, DL, TII->get(BNE)) + .addReg(Dest).addReg(OldVal).addMBB(exitMBB); + loop1MBB->addLiveIn(Ptr); + loop1MBB->addLiveIn(OldVal); + + // loop2MBB: + // sc success, newval, 0(ptr) + // beq success, $0, loop1MBB + BuildMI(loop2MBB, DL, TII->get(SC), NewVal) + .addReg(NewVal).addReg(Ptr).addImm(0); + BuildMI(loop2MBB, DL, TII->get(BEQ)) + .addReg(NewVal, RegState::Kill).addReg(ZERO).addMBB(loop1MBB); + loop2MBB->addLiveIn(Ptr); + loop2MBB->addLiveIn(NewVal); + + addPostLoopLiveIns(exitMBB, LiveRegs); + + NMBBI = BB.end(); + I->eraseFromParent(); + return true; +} + +bool MipsExpandPseudo::expandMI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NMBB) { + + bool Modified = false; + switch (MBBI->getOpcode()) { + case Mips::ATOMIC_CMP_SWAP_I32: + case Mips::ATOMIC_CMP_SWAP_I64: + return expandAtomicCmpSwap(MBB, MBBI, NMBB); + case Mips::ATOMIC_CMP_SWAP_I8_FRAG: + case Mips::ATOMIC_CMP_SWAP_I16_FRAG: + return expandAtomicCmpSwapSubword(MBB, MBBI, NMBB); + default: + return Modified; + } +} + +bool MipsExpandPseudo::expandMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + while (MBBI != E) { + MachineBasicBlock::iterator NMBBI = std::next(MBBI); + Modified |= expandMI(MBB, MBBI, NMBBI); + MBBI = NMBBI; + } + + return Modified; +} + +bool MipsExpandPseudo::runOnMachineFunction(MachineFunction &MF) { + STI = &static_cast(MF.getSubtarget()); + TII = STI->getInstrInfo(); + + bool Modified = false; + for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E; + ++MFI) + Modified |= expandMBB(*MFI); + + return Modified; +} + +/// createMipsExpandPseudoPass - returns an instance of the pseudo instruction +/// expansion pass. +FunctionPass *llvm::createMipsExpandPseudoPass() { + return new MipsExpandPseudo(); +} Index: lib/Target/Mips/MipsISelLowering.cpp =================================================================== --- lib/Target/Mips/MipsISelLowering.cpp +++ lib/Target/Mips/MipsISelLowering.cpp @@ -1053,14 +1053,11 @@ case Mips::ATOMIC_SWAP_I64: return emitAtomicBinary(MI, BB, 8, 0); - case Mips::ATOMIC_CMP_SWAP_I8: + case Mips::ATOMIC_CMP_SWAP_I8_PSEUDO: return emitAtomicCmpSwapPartword(MI, BB, 1); - case Mips::ATOMIC_CMP_SWAP_I16: + case Mips::ATOMIC_CMP_SWAP_I16_PSEUDO: return emitAtomicCmpSwapPartword(MI, BB, 2); - case Mips::ATOMIC_CMP_SWAP_I32: - return emitAtomicCmpSwap(MI, BB, 4); - case Mips::ATOMIC_CMP_SWAP_I64: - return emitAtomicCmpSwap(MI, BB, 8); + case Mips::PseudoSDIV: case Mips::PseudoUDIV: case Mips::DIV: @@ -1410,6 +1407,7 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwap(MachineInstr &MI, MachineBasicBlock *BB, unsigned Size) const { + llvm_unreachable("Should not have been called!"); assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicCmpSwap."); MachineFunction *MF = BB->getParent(); @@ -1521,18 +1519,15 @@ unsigned Mask = RegInfo.createVirtualRegister(RC); unsigned Mask2 = RegInfo.createVirtualRegister(RC); unsigned ShiftedCmpVal = RegInfo.createVirtualRegister(RC); - unsigned OldVal = RegInfo.createVirtualRegister(RC); - unsigned MaskedOldVal0 = RegInfo.createVirtualRegister(RC); unsigned ShiftedNewVal = RegInfo.createVirtualRegister(RC); unsigned MaskLSB2 = RegInfo.createVirtualRegister(RCp); unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC); unsigned MaskUpper = RegInfo.createVirtualRegister(RC); unsigned MaskedCmpVal = RegInfo.createVirtualRegister(RC); unsigned MaskedNewVal = RegInfo.createVirtualRegister(RC); - unsigned MaskedOldVal1 = RegInfo.createVirtualRegister(RC); - unsigned StoreVal = RegInfo.createVirtualRegister(RC); - unsigned SrlRes = RegInfo.createVirtualRegister(RC); - unsigned Success = RegInfo.createVirtualRegister(RC); + unsigned AtomicOp = MI.getOpcode() == Mips::ATOMIC_CMP_SWAP_I8_PSEUDO + ? Mips::ATOMIC_CMP_SWAP_I8_FRAG + : Mips::ATOMIC_CMP_SWAP_I16_FRAG; unsigned LL, SC; if (isMicroMips) { @@ -1547,14 +1542,8 @@ // insert new blocks after the current block const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineFunction::iterator It = ++BB->getIterator(); - MF->insert(It, loop1MBB); - MF->insert(It, loop2MBB); - MF->insert(It, sinkMBB); MF->insert(It, exitMBB); // Transfer the remainder of BB and its successor edges to exitMBB. @@ -1562,12 +1551,7 @@ std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); - BB->addSuccessor(loop1MBB); - loop1MBB->addSuccessor(sinkMBB); - loop1MBB->addSuccessor(loop2MBB); - loop2MBB->addSuccessor(loop1MBB); - loop2MBB->addSuccessor(sinkMBB); - sinkMBB->addSuccessor(exitMBB); + BB->addSuccessor(exitMBB); // FIXME: computation of newval2 can be moved to loop2MBB. // thisMBB: @@ -1612,40 +1596,31 @@ BuildMI(BB, DL, TII->get(Mips::SLLV), ShiftedNewVal) .addReg(MaskedNewVal).addReg(ShiftAmt); - // loop1MBB: - // ll oldval,0(alginedaddr) - // and maskedoldval0,oldval,mask - // bne maskedoldval0,shiftedcmpval,sinkMBB - BB = loop1MBB; - BuildMI(BB, DL, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0); - BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal0) - .addReg(OldVal).addReg(Mask); - BuildMI(BB, DL, TII->get(Mips::BNE)) - .addReg(MaskedOldVal0).addReg(ShiftedCmpVal).addMBB(sinkMBB); - - // loop2MBB: - // and maskedoldval1,oldval,mask2 - // or storeval,maskedoldval1,shiftednewval - // sc success,storeval,0(alignedaddr) - // beq success,$0,loop1MBB - BB = loop2MBB; - BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal1) - .addReg(OldVal).addReg(Mask2); - BuildMI(BB, DL, TII->get(Mips::OR), StoreVal) - .addReg(MaskedOldVal1).addReg(ShiftedNewVal); - BuildMI(BB, DL, TII->get(SC), Success) - .addReg(StoreVal).addReg(AlignedAddr).addImm(0); - BuildMI(BB, DL, TII->get(Mips::BEQ)) - .addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB); - - // sinkMBB: - // srl srlres,maskedoldval0,shiftamt - // sign_extend dest,srlres - BB = sinkMBB; - - BuildMI(BB, DL, TII->get(Mips::SRLV), SrlRes) - .addReg(MaskedOldVal0).addReg(ShiftAmt); - BB = emitSignExtendToI32InReg(MI, BB, Size, Dest, SrlRes); + // For correctness purpose, a new pseudo is introduced here. We need this + // new pseudo, so that FastRegisterAllocator does not see an ll/sc sequence + // that is spread over >1 basic blocks. A register allocator which + // introduces (or any codegen infact) a store, can violate the expactations + // of the hardware. + // + // An atomic read-modify-write sequence starts with a linked load + // instruction and ends with a store conditional instruction. The atomic + // read-modify-write sequence failes if any of the following conditions + // occur between the execution of ll and sc: + // * A coherent store is completed by another process or coherent I/O + // module into the block of synchronizable physical memory containing + // the word. The size and alignment of the block is + // implementation-dependent. + // * A coherent store is executed between an LL and SC sequence on the + // same processor to the block of synchornizable physical memory + // containing the word. + // + BuildMI(BB, DL, TII->get(AtomicOp), Dest) + .addReg(AlignedAddr) + .addReg(Mask) + .addReg(ShiftedCmpVal) + .addReg(Mask2) + .addReg(ShiftedNewVal) + .addReg(ShiftAmt); MI.eraseFromParent(); // The instruction is gone now. Index: lib/Target/Mips/MipsInstrInfo.td =================================================================== --- lib/Target/Mips/MipsInstrInfo.td +++ lib/Target/Mips/MipsInstrInfo.td @@ -1666,6 +1666,10 @@ PseudoSE<(outs DRC:$dst), (ins PtrRC:$ptr, DRC:$cmp, DRC:$swap), [(set DRC:$dst, (Op iPTR:$ptr, DRC:$cmp, DRC:$swap))]>; +class AtomicCmpSwapSubword : + PseudoSE<(outs RC:$dst), (ins PtrRC:$ptr, RC:$mask, RC:$ShiftCmpVal, + RC:$mask2, RC:$ShiftNewVal, RC:$ShiftAmt), []>; + class LLBase : InstSE<(outs RO:$rt), (ins MO:$addr), !strconcat(opstr, "\t$rt, $addr"), [], II_LL, FrmI, opstr> { @@ -1744,11 +1748,21 @@ def ATOMIC_SWAP_I16 : Atomic2Ops; def ATOMIC_SWAP_I32 : Atomic2Ops; - def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap; - def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap; - def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap; + def ATOMIC_CMP_SWAP_I8_PSEUDO : AtomicCmpSwap; + def ATOMIC_CMP_SWAP_I16_PSEUDO : AtomicCmpSwap; } +let isPseudo = 1 in { + // The expansion of ATOMIC_CMP_SWAP_I(8|16) occurs in two parts. First, + // the *_PSEUDO is partially lowering during ISelLowering to compute the + // aligned addresses and necessary masks, along with another pseudo which + // represents the ll/sc loop. That pseudo is lowered after the basic + // postRA pseudos have been lowered. + def ATOMIC_CMP_SWAP_I8_FRAG : AtomicCmpSwapSubword; + def ATOMIC_CMP_SWAP_I16_FRAG : AtomicCmpSwapSubword; + + def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap; +} /// Pseudo instructions for loading and storing accumulator registers. let isPseudo = 1, isCodeGenOnly = 1, hasNoSchedulingInfo = 1 in { def LOAD_ACC64 : Load<"", ACC64>; Index: lib/Target/Mips/MipsTargetMachine.cpp =================================================================== --- lib/Target/Mips/MipsTargetMachine.cpp +++ lib/Target/Mips/MipsTargetMachine.cpp @@ -213,6 +213,7 @@ bool addInstSelector() override; void addPreEmitPass() override; void addPreRegAlloc() override; + void addPreSched2() override; }; } // end anonymous namespace @@ -270,3 +271,7 @@ addPass(createMipsLongBranchPass(TM)); addPass(createMipsConstantIslandPass()); } + +void MipsPassConfig::addPreSched2() { + addPass(createMipsExpandPseudoPass()); +} Index: test/CodeGen/Mips/atomicCmpSwapPW.ll =================================================================== --- test/CodeGen/Mips/atomicCmpSwapPW.ll +++ test/CodeGen/Mips/atomicCmpSwapPW.ll @@ -5,13 +5,21 @@ ; RUN: llc -O0 -march=mips64el -mcpu=mips64r2 -target-abi=n64 < %s -filetype=asm -o - \ ; RUN: | FileCheck -check-prefixes=PTR64,ALL %s + +; ALL-LABEL: foo: ; PTR32: lw $[[R0:[0-9]+]] +; PTR32: addiu $[[R1:[0-9]+]], $zero, -4 +; PTR32: and $[[R2:[0-9]+]], $[[R0]], $[[R1]] + ; PTR64: ld $[[R0:[0-9]+]] +; PTR64: daddiu $[[R1:[0-9]+]], $zero, -4 +; PTR64: and $[[R2:[0-9]+]], $[[R0]], $[[R1]] -; ALL: ll ${{[0-9]+}}, 0($[[R0]]) +; ALL: ll ${{[0-9]+}}, 0($[[R2]]) -define {i16, i1} @foo(i16* %addr, i16 signext %r, i16 zeroext %new) { - %res = cmpxchg i16* %addr, i16 %r, i16 %new seq_cst seq_cst +define {i16, i1} @foo(i16** %addr, i16 signext %r, i16 zeroext %new) { + %ptr = load i16*, i16** %addr + %res = cmpxchg i16* %ptr, i16 %r, i16 %new seq_cst seq_cst ret {i16, i1} %res } Index: test/CodeGen/Mips/no-store-in-atomic-rmw.ll =================================================================== --- /dev/null +++ test/CodeGen/Mips/no-store-in-atomic-rmw.ll @@ -0,0 +1,156 @@ +; RUN: llc -O0 -march=mips64 -mcpu=mips64r2 < %s | FileCheck %s + +; Check that no stores occur between ll and sc when the fast register allocator +; is used. Atomic read-modify-write sequences on certain MIPS implementations +; will fail if a store occurs between a ll and sc. + +define i32 @main() { +; CHECK-LABEL: main: +entry: + %retval = alloca i32, align 4 + %I = alloca i32, align 4 + %k = alloca i32, align 4 + %i = alloca i32*, align 8 + %ret = alloca i32, align 4 + %flag_k = alloca i8, align 1 + %.atomictmp = alloca i32, align 4 + %atomic-temp = alloca i32, align 4 + %.atomictmp1 = alloca i32, align 4 + %atomic-temp2 = alloca i32, align 4 + %.atomictmp3 = alloca i32, align 4 + %atomic-temp4 = alloca i32, align 4 + %.atomictmp5 = alloca i32, align 4 + %atomic-temp6 = alloca i32, align 4 + %.atomictmp7 = alloca i32, align 4 + %atomic-temp8 = alloca i32, align 4 + %.atomictmp9 = alloca i32, align 4 + %atomic-temp10 = alloca i32, align 4 + %.atomictmp11 = alloca i32, align 4 + %atomic-temp12 = alloca i32, align 4 + %.atomictmp13 = alloca i32, align 4 + %cmpxchg.bool = alloca i8, align 1 + %cmpxchg.bool14 = alloca i8, align 1 + store i32 0, i32* %retval, align 4 + store i32 0, i32* %I, align 4 + store i32 5, i32* %k, align 4 + store i32* %I, i32** %i, align 8 + store i32 0, i32* %ret, align 4 + store i8 0, i8* %flag_k, align 1 + %0 = load i32*, i32** %i, align 8 + %1 = load i32, i32* %k, align 4 + %2 = atomicrmw xchg i32* %0, i32 %1 monotonic +; CHECK-LABEL: .LBB0_1: +; CHECK: ll +; CHECK-NOT: sd +; CHECK-NOT: sw +; CHECK: sc + store i32 %2, i32* %ret, align 4 + %3 = load i32*, i32** %i, align 8 + store i32 3, i32* %.atomictmp, align 4 + %4 = load i32, i32* %.atomictmp, align 4 + %5 = atomicrmw add i32* %3, i32 %4 monotonic +; CHECK-LABEL: .LBB0_3: +; CHECK: ll +; CHECK-NOT: sd +; CHECK-NOT: sw +; CHECK: addu +; CHECK: sc + store i32 %5, i32* %atomic-temp, align 4 + %6 = load i32, i32* %atomic-temp, align 4 + %7 = load i32*, i32** %i, align 8 + store i32 3, i32* %.atomictmp1, align 4 + %8 = load i32, i32* %.atomictmp1, align 4 + %9 = atomicrmw sub i32* %7, i32 %8 monotonic +; CHECK-LABEL: .LBB0_5: +; CHECK: ll +; CHECK-NOT: sd +; CHECK-NOT: sw +; CHECK: subu +; CHECK: sc + store i32 %9, i32* %atomic-temp2, align 4 + %10 = load i32, i32* %atomic-temp2, align 4 + %11 = load i32*, i32** %i, align 8 + store i32 3, i32* %.atomictmp3, align 4 + %12 = load i32, i32* %.atomictmp3, align 4 + %13 = atomicrmw and i32* %11, i32 %12 monotonic +; CHECK-LABEL: .LBB0_7: +; CHECK: ll +; CHECK-NOT: sd +; CHECK-NOT: sw +; CHECK: and +; CHECK: sc + store i32 %13, i32* %atomic-temp4, align 4 + %14 = load i32, i32* %atomic-temp4, align 4 + %15 = load i32*, i32** %i, align 8 + store i32 3, i32* %.atomictmp5, align 4 + %16 = load i32, i32* %.atomictmp5, align 4 + %17 = atomicrmw or i32* %15, i32 %16 monotonic +; CHECK-LABEL: .LBB0_9: +; CHECK: ll +; CHECK-NOT: sd +; CHECK-NOT: sw +; CHECK: or +; CHECK: sc + %18 = load i32*, i32** %i, align 8 + store i32 5, i32* %.atomictmp13, align 4 + %19 = load i32, i32* %I, align 4 + %20 = load i32, i32* %.atomictmp13, align 4 + %21 = cmpxchg weak i32* %18, i32 %19, i32 %20 monotonic monotonic +; CHECK-LABEL: .LBB0_11: +; CHECK: ll +; CHECK-NOT: sd +; CHECK-NOT: sw +; CHECK: sc + %22 = extractvalue { i32, i1 } %21, 0 + %23 = extractvalue { i32, i1 } %21, 1 + br i1 %23, label %cmpxchg.continue, label %cmpxchg.store_expected + +cmpxchg.store_expected: ; preds = %entry + store i32 %22, i32* %I, align 4 + br label %cmpxchg.continue + +cmpxchg.continue: ; preds = %cmpxchg.store_expected, %entry + %frombool = zext i1 %23 to i8 + store i8 %frombool, i8* %cmpxchg.bool, align 1 + %24 = load i8, i8* %cmpxchg.bool, align 1 + %tobool = trunc i8 %24 to i1 + %25 = load i32*, i32** %i, align 8 + %26 = load i32, i32* %I, align 4 + %27 = load i32, i32* %ret, align 4 + %28 = cmpxchg i32* %25, i32 %26, i32 %27 monotonic monotonic +; CHECK-LABEL: .LBB0_17: +; CHECK: ll +; CHECK-NOT: sd +; CHECK-NOT: sw +; CHECK: sc + %29 = extractvalue { i32, i1 } %28, 0 + %30 = extractvalue { i32, i1 } %28, 1 + br i1 %30, label %cmpxchg.continue16, label %cmpxchg.store_expected15 + +cmpxchg.store_expected15: ; preds = %cmpxchg.continue + store i32 %29, i32* %I, align 4 + br label %cmpxchg.continue16 + +cmpxchg.continue16: ; preds = %cmpxchg.store_expected15, %cmpxchg.continue + %frombool17 = zext i1 %30 to i8 + store i8 %frombool17, i8* %cmpxchg.bool14, align 1 + %31 = load i8, i8* %cmpxchg.bool14, align 1 + %tobool18 = trunc i8 %31 to i1 + %32 = atomicrmw xchg i8* %flag_k, i8 1 monotonic +; CHECK-LABEL: .LBB0_23: +; CHECK: ll +; CHECK-NOT: sd +; CHECK-NOT: sw +; CHECK: sc + %tobool19 = icmp ne i8 %32, 0 + %33 = atomicrmw xchg i8* %flag_k, i8 1 monotonic +; CHECK-LABEL: .LBB0_26: +; CHECK: ll +; CHECK-NOT: sd +; CHECK-NOT: sw +; CHECK: sc + %tobool20 = icmp ne i8 %33, 0 + store atomic i8 0, i8* %flag_k monotonic, align 1 + %34 = load i32, i32* %retval, align 4 + ret i32 %34 +}