Index: llvm/trunk/lib/Target/Mips/CMakeLists.txt =================================================================== --- llvm/trunk/lib/Target/Mips/CMakeLists.txt +++ llvm/trunk/lib/Target/Mips/CMakeLists.txt @@ -26,6 +26,7 @@ MipsCCState.cpp MipsConstantIslandPass.cpp MipsDelaySlotFiller.cpp + MipsExpandPseudo.cpp MipsFastISel.cpp MipsHazardSchedule.cpp MipsInstrInfo.cpp Index: llvm/trunk/lib/Target/Mips/Mips.h =================================================================== --- llvm/trunk/lib/Target/Mips/Mips.h +++ llvm/trunk/lib/Target/Mips/Mips.h @@ -32,6 +32,7 @@ FunctionPass *createMipsHazardSchedule(); FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM); FunctionPass *createMipsConstantIslandPass(); + FunctionPass *createMipsExpandPseudoPass(); } // end namespace llvm; #endif Index: llvm/trunk/lib/Target/Mips/Mips64InstrInfo.td =================================================================== --- llvm/trunk/lib/Target/Mips/Mips64InstrInfo.td +++ llvm/trunk/lib/Target/Mips/Mips64InstrInfo.td @@ -73,6 +73,9 @@ def ATOMIC_LOAD_XOR_I64 : Atomic2Ops; def ATOMIC_LOAD_NAND_I64 : Atomic2Ops; def ATOMIC_SWAP_I64 : Atomic2Ops; +} + +let isPseudo = 1 in { def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap; } Index: llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp +++ llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp @@ -1053,14 +1053,11 @@ case Mips::ATOMIC_SWAP_I64: return emitAtomicBinary(MI, BB, 8, 0); - case Mips::ATOMIC_CMP_SWAP_I8: + case Mips::ATOMIC_CMP_SWAP_I8_PSEUDO: return emitAtomicCmpSwapPartword(MI, BB, 1); - case Mips::ATOMIC_CMP_SWAP_I16: + case Mips::ATOMIC_CMP_SWAP_I16_PSEUDO: return emitAtomicCmpSwapPartword(MI, BB, 2); - case Mips::ATOMIC_CMP_SWAP_I32: - return emitAtomicCmpSwap(MI, BB, 4); - case Mips::ATOMIC_CMP_SWAP_I64: - return emitAtomicCmpSwap(MI, BB, 8); + case Mips::PseudoSDIV: case Mips::PseudoUDIV: case Mips::DIV: @@ -1407,96 +1404,6 @@ return exitMBB; } -MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwap(MachineInstr &MI, - MachineBasicBlock *BB, - unsigned Size) const { - assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicCmpSwap."); - - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &RegInfo = MF->getRegInfo(); - const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8)); - const TargetInstrInfo *TII = Subtarget.getInstrInfo(); - const bool ArePtrs64bit = ABI.ArePtrs64bit(); - DebugLoc DL = MI.getDebugLoc(); - unsigned LL, SC, ZERO, BNE, BEQ; - - if (Size == 4) { - if (isMicroMips) { - LL = Mips::LL_MM; - SC = Mips::SC_MM; - } else { - LL = Subtarget.hasMips32r6() - ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6) - : (ArePtrs64bit ? Mips::LL64 : Mips::LL); - SC = Subtarget.hasMips32r6() - ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6) - : (ArePtrs64bit ? Mips::SC64 : Mips::SC); - } - - ZERO = Mips::ZERO; - BNE = Mips::BNE; - BEQ = Mips::BEQ; - } else { - LL = Subtarget.hasMips64r6() ? Mips::LLD_R6 : Mips::LLD; - SC = Subtarget.hasMips64r6() ? Mips::SCD_R6 : Mips::SCD; - ZERO = Mips::ZERO_64; - BNE = Mips::BNE64; - BEQ = Mips::BEQ64; - } - - unsigned Dest = MI.getOperand(0).getReg(); - unsigned Ptr = MI.getOperand(1).getReg(); - unsigned OldVal = MI.getOperand(2).getReg(); - unsigned NewVal = MI.getOperand(3).getReg(); - - unsigned Success = RegInfo.createVirtualRegister(RC); - - // insert new blocks after the current block - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineFunction::iterator It = ++BB->getIterator(); - MF->insert(It, loop1MBB); - MF->insert(It, loop2MBB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - // thisMBB: - // ... - // fallthrough --> loop1MBB - BB->addSuccessor(loop1MBB); - loop1MBB->addSuccessor(exitMBB); - loop1MBB->addSuccessor(loop2MBB); - loop2MBB->addSuccessor(loop1MBB); - loop2MBB->addSuccessor(exitMBB); - - // loop1MBB: - // ll dest, 0(ptr) - // bne dest, oldval, exitMBB - BB = loop1MBB; - BuildMI(BB, DL, TII->get(LL), Dest).addReg(Ptr).addImm(0); - BuildMI(BB, DL, TII->get(BNE)) - .addReg(Dest).addReg(OldVal).addMBB(exitMBB); - - // loop2MBB: - // sc success, newval, 0(ptr) - // beq success, $0, loop1MBB - BB = loop2MBB; - BuildMI(BB, DL, TII->get(SC), Success) - .addReg(NewVal).addReg(Ptr).addImm(0); - BuildMI(BB, DL, TII->get(BEQ)) - .addReg(Success).addReg(ZERO).addMBB(loop1MBB); - - MI.eraseFromParent(); // The instruction is gone now. - - return exitMBB; -} - MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword( MachineInstr &MI, MachineBasicBlock *BB, unsigned Size) const { assert((Size == 1 || Size == 2) && @@ -1521,18 +1428,15 @@ unsigned Mask = RegInfo.createVirtualRegister(RC); unsigned Mask2 = RegInfo.createVirtualRegister(RC); unsigned ShiftedCmpVal = RegInfo.createVirtualRegister(RC); - unsigned OldVal = RegInfo.createVirtualRegister(RC); - unsigned MaskedOldVal0 = RegInfo.createVirtualRegister(RC); unsigned ShiftedNewVal = RegInfo.createVirtualRegister(RC); unsigned MaskLSB2 = RegInfo.createVirtualRegister(RCp); unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC); unsigned MaskUpper = RegInfo.createVirtualRegister(RC); unsigned MaskedCmpVal = RegInfo.createVirtualRegister(RC); unsigned MaskedNewVal = RegInfo.createVirtualRegister(RC); - unsigned MaskedOldVal1 = RegInfo.createVirtualRegister(RC); - unsigned StoreVal = RegInfo.createVirtualRegister(RC); - unsigned SrlRes = RegInfo.createVirtualRegister(RC); - unsigned Success = RegInfo.createVirtualRegister(RC); + unsigned AtomicOp = MI.getOpcode() == Mips::ATOMIC_CMP_SWAP_I8_PSEUDO + ? Mips::ATOMIC_CMP_SWAP_I8_FRAG + : Mips::ATOMIC_CMP_SWAP_I16_FRAG; unsigned LL, SC; if (isMicroMips) { @@ -1547,14 +1451,8 @@ // insert new blocks after the current block const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineFunction::iterator It = ++BB->getIterator(); - MF->insert(It, loop1MBB); - MF->insert(It, loop2MBB); - MF->insert(It, sinkMBB); MF->insert(It, exitMBB); // Transfer the remainder of BB and its successor edges to exitMBB. @@ -1562,12 +1460,7 @@ std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); - BB->addSuccessor(loop1MBB); - loop1MBB->addSuccessor(sinkMBB); - loop1MBB->addSuccessor(loop2MBB); - loop2MBB->addSuccessor(loop1MBB); - loop2MBB->addSuccessor(sinkMBB); - sinkMBB->addSuccessor(exitMBB); + BB->addSuccessor(exitMBB); // FIXME: computation of newval2 can be moved to loop2MBB. // thisMBB: @@ -1612,40 +1505,31 @@ BuildMI(BB, DL, TII->get(Mips::SLLV), ShiftedNewVal) .addReg(MaskedNewVal).addReg(ShiftAmt); - // loop1MBB: - // ll oldval,0(alginedaddr) - // and maskedoldval0,oldval,mask - // bne maskedoldval0,shiftedcmpval,sinkMBB - BB = loop1MBB; - BuildMI(BB, DL, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0); - BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal0) - .addReg(OldVal).addReg(Mask); - BuildMI(BB, DL, TII->get(Mips::BNE)) - .addReg(MaskedOldVal0).addReg(ShiftedCmpVal).addMBB(sinkMBB); - - // loop2MBB: - // and maskedoldval1,oldval,mask2 - // or storeval,maskedoldval1,shiftednewval - // sc success,storeval,0(alignedaddr) - // beq success,$0,loop1MBB - BB = loop2MBB; - BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal1) - .addReg(OldVal).addReg(Mask2); - BuildMI(BB, DL, TII->get(Mips::OR), StoreVal) - .addReg(MaskedOldVal1).addReg(ShiftedNewVal); - BuildMI(BB, DL, TII->get(SC), Success) - .addReg(StoreVal).addReg(AlignedAddr).addImm(0); - BuildMI(BB, DL, TII->get(Mips::BEQ)) - .addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB); - - // sinkMBB: - // srl srlres,maskedoldval0,shiftamt - // sign_extend dest,srlres - BB = sinkMBB; - - BuildMI(BB, DL, TII->get(Mips::SRLV), SrlRes) - .addReg(MaskedOldVal0).addReg(ShiftAmt); - BB = emitSignExtendToI32InReg(MI, BB, Size, Dest, SrlRes); + // For correctness purpose, a new pseudo is introduced here. We need this + // new pseudo, so that FastRegisterAllocator does not see an ll/sc sequence + // that is spread over >1 basic blocks. A register allocator which + // introduces (or any codegen infact) a store, can violate the expactations + // of the hardware. + // + // An atomic read-modify-write sequence starts with a linked load + // instruction and ends with a store conditional instruction. The atomic + // read-modify-write sequence failes if any of the following conditions + // occur between the execution of ll and sc: + // * A coherent store is completed by another process or coherent I/O + // module into the block of synchronizable physical memory containing + // the word. The size and alignment of the block is + // implementation-dependent. + // * A coherent store is executed between an LL and SC sequence on the + // same processor to the block of synchornizable physical memory + // containing the word. + // + BuildMI(BB, DL, TII->get(AtomicOp), Dest) + .addReg(AlignedAddr) + .addReg(Mask) + .addReg(ShiftedCmpVal) + .addReg(Mask2) + .addReg(ShiftedNewVal) + .addReg(ShiftAmt); MI.eraseFromParent(); // The instruction is gone now. Index: llvm/trunk/lib/Target/Mips/MipsInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/Mips/MipsInstrInfo.td +++ llvm/trunk/lib/Target/Mips/MipsInstrInfo.td @@ -1666,6 +1666,10 @@ PseudoSE<(outs DRC:$dst), (ins PtrRC:$ptr, DRC:$cmp, DRC:$swap), [(set DRC:$dst, (Op iPTR:$ptr, DRC:$cmp, DRC:$swap))]>; +class AtomicCmpSwapSubword : + PseudoSE<(outs RC:$dst), (ins PtrRC:$ptr, RC:$mask, RC:$ShiftCmpVal, + RC:$mask2, RC:$ShiftNewVal, RC:$ShiftAmt), []>; + class LLBase : InstSE<(outs RO:$rt), (ins MO:$addr), !strconcat(opstr, "\t$rt, $addr"), [], II_LL, FrmI, opstr> { @@ -1744,11 +1748,21 @@ def ATOMIC_SWAP_I16 : Atomic2Ops; def ATOMIC_SWAP_I32 : Atomic2Ops; - def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap; - def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap; - def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap; + def ATOMIC_CMP_SWAP_I8_PSEUDO : AtomicCmpSwap; + def ATOMIC_CMP_SWAP_I16_PSEUDO : AtomicCmpSwap; } +let isPseudo = 1 in { + // The expansion of ATOMIC_CMP_SWAP_I(8|16) occurs in two parts. First, + // the *_PSEUDO is partially lowering during ISelLowering to compute the + // aligned addresses and necessary masks, along with another pseudo which + // represents the ll/sc loop. That pseudo is lowered after the basic + // postRA pseudos have been lowered. + def ATOMIC_CMP_SWAP_I8_FRAG : AtomicCmpSwapSubword; + def ATOMIC_CMP_SWAP_I16_FRAG : AtomicCmpSwapSubword; + + def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap; +} /// Pseudo instructions for loading and storing accumulator registers. let isPseudo = 1, isCodeGenOnly = 1, hasNoSchedulingInfo = 1 in { def LOAD_ACC64 : Load<"", ACC64>; Index: llvm/trunk/lib/Target/Mips/MipsTargetMachine.cpp =================================================================== --- llvm/trunk/lib/Target/Mips/MipsTargetMachine.cpp +++ llvm/trunk/lib/Target/Mips/MipsTargetMachine.cpp @@ -213,6 +213,7 @@ bool addInstSelector() override; void addPreEmitPass() override; void addPreRegAlloc() override; + void addPreSched2() override; }; } // end anonymous namespace @@ -270,3 +271,7 @@ addPass(createMipsLongBranchPass(TM)); addPass(createMipsConstantIslandPass()); } + +void MipsPassConfig::addPreSched2() { + addPass(createMipsExpandPseudoPass()); +} Index: llvm/trunk/test/CodeGen/Mips/atomicCmpSwapPW.ll =================================================================== --- llvm/trunk/test/CodeGen/Mips/atomicCmpSwapPW.ll +++ llvm/trunk/test/CodeGen/Mips/atomicCmpSwapPW.ll @@ -5,13 +5,21 @@ ; RUN: llc -O0 -march=mips64el -mcpu=mips64r2 -target-abi=n64 < %s -filetype=asm -o - \ ; RUN: | FileCheck -check-prefixes=PTR64,ALL %s + +; ALL-LABEL: foo: ; PTR32: lw $[[R0:[0-9]+]] +; PTR32: addiu $[[R1:[0-9]+]], $zero, -4 +; PTR32: and $[[R2:[0-9]+]], $[[R0]], $[[R1]] + ; PTR64: ld $[[R0:[0-9]+]] +; PTR64: daddiu $[[R1:[0-9]+]], $zero, -4 +; PTR64: and $[[R2:[0-9]+]], $[[R0]], $[[R1]] -; ALL: ll ${{[0-9]+}}, 0($[[R0]]) +; ALL: ll ${{[0-9]+}}, 0($[[R2]]) -define {i16, i1} @foo(i16* %addr, i16 signext %r, i16 zeroext %new) { - %res = cmpxchg i16* %addr, i16 %r, i16 %new seq_cst seq_cst +define {i16, i1} @foo(i16** %addr, i16 signext %r, i16 zeroext %new) { + %ptr = load i16*, i16** %addr + %res = cmpxchg i16* %ptr, i16 %r, i16 %new seq_cst seq_cst ret {i16, i1} %res }