Index: lib/Target/Mips/CMakeLists.txt =================================================================== --- lib/Target/Mips/CMakeLists.txt +++ lib/Target/Mips/CMakeLists.txt @@ -26,6 +26,7 @@ MipsCCState.cpp MipsConstantIslandPass.cpp MipsDelaySlotFiller.cpp + MipsExpandPseudo.cpp MipsFastISel.cpp MipsHazardSchedule.cpp MipsInstrInfo.cpp Index: lib/Target/Mips/Mips.h =================================================================== --- lib/Target/Mips/Mips.h +++ lib/Target/Mips/Mips.h @@ -33,6 +33,7 @@ FunctionPass *createMipsLongBranchPass(); FunctionPass *createMipsConstantIslandPass(); FunctionPass *createMicroMipsSizeReductionPass(); + FunctionPass *createMipsExpandPseudoPass(); } // end namespace llvm; #endif Index: lib/Target/Mips/Mips64InstrInfo.td =================================================================== --- lib/Target/Mips/Mips64InstrInfo.td +++ lib/Target/Mips/Mips64InstrInfo.td @@ -76,6 +76,17 @@ def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap; } +def ATOMIC_LOAD_ADD_I64_POSTRA : Atomic2OpsPostRA; +def ATOMIC_LOAD_SUB_I64_POSTRA : Atomic2OpsPostRA; +def ATOMIC_LOAD_AND_I64_POSTRA : Atomic2OpsPostRA; +def ATOMIC_LOAD_OR_I64_POSTRA : Atomic2OpsPostRA; +def ATOMIC_LOAD_XOR_I64_POSTRA : Atomic2OpsPostRA; +def ATOMIC_LOAD_NAND_I64_POSTRA : Atomic2OpsPostRA; + +def ATOMIC_SWAP_I64_POSTRA : Atomic2OpsPostRA; + +def ATOMIC_CMP_SWAP_I64_POSTRA : AtomicCmpSwapPostRA; + /// Pseudo instructions for loading and storing accumulator registers. let isPseudo = 1, isCodeGenOnly = 1, hasNoSchedulingInfo = 1 in { def LOAD_ACC128 : Load<"", ACC128>; Index: lib/Target/Mips/MipsExpandPseudo.cpp =================================================================== --- /dev/null +++ lib/Target/Mips/MipsExpandPseudo.cpp @@ -0,0 +1,692 @@ +//===-- MipsExpandPseudoInsts.cpp - Expand pseudo instructions ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that expands pseudo instructions into target +// instructions to allow proper scheduling, if-conversion, and other late +// optimizations. This pass should be run after register allocation but before +// the post-regalloc scheduling pass. +// +// Currently only used for expanding atomic pseudo after register allocation. +// We do this to avoid the fast register allocator introducing spills between +// ll and sc. These stores cause some MIPS implementations to abort the +// atomic RMW sequence. +// +//===----------------------------------------------------------------------===// + +#include "Mips.h" +#include "MipsInstrInfo.h" +#include "MipsSubtarget.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" + +using namespace llvm; + +#define DEBUG_TYPE "mips-pseudo" + +namespace { + class MipsExpandPseudo : public MachineFunctionPass { + public: + static char ID; + MipsExpandPseudo() : MachineFunctionPass(ID) {} + + const MipsInstrInfo *TII; + const MipsSubtarget *STI; + const MachineRegisterInfo *MRI; + + bool runOnMachineFunction(MachineFunction &Fn) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + + StringRef getPassName() const override { + return "Mips pseudo instruction expansion pass"; + } + + private: + bool expandAtomicCmpSwap(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandAtomicCmpSwapSubword(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + + bool expandAtomicBinOp(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI, unsigned Size); + bool expandAtomicBinOpSubword(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI); + + bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NMBB); + bool expandMBB(MachineBasicBlock &MBB); + }; + char MipsExpandPseudo::ID = 0; +} + +bool MipsExpandPseudo::expandAtomicCmpSwapSubword( + MachineBasicBlock &BB, MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI) { + + MachineFunction *MF = BB.getParent(); + + const bool ArePtrs64bit = STI->getABI().ArePtrs64bit(); + DebugLoc DL = I->getDebugLoc(); + unsigned LL, SC; + + unsigned ZERO = Mips::ZERO; + unsigned BNE = Mips::BNE; + unsigned BEQ = Mips::BEQ; + unsigned SEOp = + I->getOpcode() == Mips::ATOMIC_CMP_SWAP_I8_POSTRA ? Mips::SEB : Mips::SEH; + + if (STI->inMicroMipsMode()) { + LL = Mips::LL_MM; + SC = Mips::SC_MM; + } else { + LL = STI->hasMips32r6() ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6) + : (ArePtrs64bit ? Mips::LL64 : Mips::LL); + SC = STI->hasMips32r6() ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6) + : (ArePtrs64bit ? Mips::SC64 : Mips::SC); + } + + unsigned Dest = I->getOperand(0).getReg(); + unsigned Ptr = I->getOperand(1).getReg(); + unsigned Mask = I->getOperand(2).getReg(); + unsigned ShiftCmpVal = I->getOperand(3).getReg(); + unsigned Mask2 = I->getOperand(4).getReg(); + unsigned ShiftNewVal = I->getOperand(5).getReg(); + unsigned ShiftAmnt = I->getOperand(6).getReg(); + unsigned Scratch = I->getOperand(7).getReg(); + unsigned Scratch2 = I->getOperand(8).getReg(); + + LivePhysRegs LiveRegs(TII->getRegisterInfo()); + + // insert new blocks after the current block + const BasicBlock *LLVM_BB = BB.getBasicBlock(); + MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineFunction::iterator It = ++BB.getIterator(); + MF->insert(It, loop1MBB); + MF->insert(It, loop2MBB); + MF->insert(It, sinkMBB); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), &BB, + std::next(MachineBasicBlock::iterator(I)), BB.end()); + exitMBB->transferSuccessorsAndUpdatePHIs(&BB); + computeLiveIns(LiveRegs, *MRI, *exitMBB); + + // thisMBB: + // ... + // fallthrough --> loop1MBB + BB.addSuccessor(loop1MBB, BranchProbability::getOne()); + loop1MBB->addSuccessor(sinkMBB); + loop1MBB->addSuccessor(loop2MBB); + loop2MBB->addSuccessor(loop1MBB); + loop2MBB->addSuccessor(sinkMBB); + sinkMBB->addSuccessor(exitMBB, BranchProbability::getOne()); + + // loop1MBB: + // ll dest, 0(ptr) + // and Mask', dest, Mask + // bne Mask', ShiftCmpVal, exitMBB + BuildMI(loop1MBB, DL, TII->get(LL), Scratch).addReg(Ptr).addImm(0); + BuildMI(loop1MBB, DL, TII->get(Mips::AND), Scratch2) + .addReg(Scratch) + .addReg(Mask); + BuildMI(loop1MBB, DL, TII->get(BNE)) + .addReg(Scratch2).addReg(ShiftCmpVal).addMBB(sinkMBB); + computeLiveIns(LiveRegs, *MRI, *loop1MBB); + + // loop2MBB: + // and dest, dest, mask2 + // or dest, dest, ShiftNewVal + // sc dest, dest, 0(ptr) + // beq dest, $0, loop1MBB + BuildMI(loop2MBB, DL, TII->get(Mips::AND), Scratch) + .addReg(Scratch, RegState::Kill) + .addReg(Mask2); + BuildMI(loop2MBB, DL, TII->get(Mips::OR), Scratch) + .addReg(Scratch, RegState::Kill) + .addReg(ShiftNewVal); + BuildMI(loop2MBB, DL, TII->get(SC), Scratch) + .addReg(Scratch, RegState::Kill) + .addReg(Ptr) + .addImm(0); + BuildMI(loop2MBB, DL, TII->get(BEQ)) + .addReg(Scratch, RegState::Kill) + .addReg(ZERO) + .addMBB(loop1MBB); + computeLiveIns(LiveRegs, *MRI, *loop2MBB); + + // sinkMBB: + // srl srlres, Mask', shiftamt + // sign_extend dest,srlres + BuildMI(sinkMBB, DL, TII->get(Mips::SRLV), Dest) + .addReg(Scratch2) + .addReg(ShiftAmnt); + if (STI->hasMips32r2()) { + BuildMI(sinkMBB, DL, TII->get(SEOp), Dest).addReg(Dest); + } else { + const unsigned ShiftImm = + I->getOpcode() == Mips::ATOMIC_CMP_SWAP_I16_POSTRA ? 16 : 24; + BuildMI(sinkMBB, DL, TII->get(Mips::SLL), Dest) + .addReg(Dest, RegState::Kill) + .addImm(ShiftImm); + BuildMI(sinkMBB, DL, TII->get(Mips::SRA), Dest) + .addReg(Dest, RegState::Kill) + .addImm(ShiftImm); + } + + computeLiveIns(LiveRegs, *MRI, *sinkMBB); + + NMBBI = BB.end(); + I->eraseFromParent(); + return true; +} + +bool MipsExpandPseudo::expandAtomicCmpSwap(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI) { + + const unsigned Size = + I->getOpcode() == Mips::ATOMIC_CMP_SWAP_I32_POSTRA ? 4 : 8; + MachineFunction *MF = BB.getParent(); + + const bool ArePtrs64bit = STI->getABI().ArePtrs64bit(); + DebugLoc DL = I->getDebugLoc(); + LivePhysRegs LiveRegs(TII->getRegisterInfo()); + + unsigned LL, SC, ZERO, BNE, BEQ, MOVE; + + if (Size == 4) { + if (STI->inMicroMipsMode()) { + LL = Mips::LL_MM; + SC = Mips::SC_MM; + } else { + LL = STI->hasMips32r6() + ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6) + : (ArePtrs64bit ? Mips::LL64 : Mips::LL); + SC = STI->hasMips32r6() + ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6) + : (ArePtrs64bit ? Mips::SC64 : Mips::SC); + } + + ZERO = Mips::ZERO; + BNE = Mips::BNE; + BEQ = Mips::BEQ; + MOVE = Mips::OR; + } else { + LL = STI->hasMips64r6() ? Mips::LLD_R6 : Mips::LLD; + SC = STI->hasMips64r6() ? Mips::SCD_R6 : Mips::SCD; + ZERO = Mips::ZERO_64; + BNE = Mips::BNE64; + BEQ = Mips::BEQ64; + MOVE = Mips::OR64; + } + + unsigned Dest = I->getOperand(0).getReg(); + unsigned Ptr = I->getOperand(1).getReg(); + unsigned OldVal = I->getOperand(2).getReg(); + unsigned NewVal = I->getOperand(3).getReg(); + unsigned Scratch = I->getOperand(4).getReg(); + + // insert new blocks after the current block + const BasicBlock *LLVM_BB = BB.getBasicBlock(); + MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineFunction::iterator It = ++BB.getIterator(); + MF->insert(It, loop1MBB); + MF->insert(It, loop2MBB); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), &BB, + std::next(MachineBasicBlock::iterator(I)), BB.end()); + exitMBB->transferSuccessorsAndUpdatePHIs(&BB); + computeLiveIns(LiveRegs, *MRI, *exitMBB); + + // thisMBB: + // ... + // fallthrough --> loop1MBB + BB.addSuccessor(loop1MBB, BranchProbability::getOne()); + loop1MBB->addSuccessor(exitMBB); + loop1MBB->addSuccessor(loop2MBB); + loop2MBB->addSuccessor(loop1MBB); + loop2MBB->addSuccessor(exitMBB); + + // loop1MBB: + // ll dest, 0(ptr) + // bne dest, oldval, exitMBB + BuildMI(loop1MBB, DL, TII->get(LL), Dest).addReg(Ptr).addImm(0); + BuildMI(loop1MBB, DL, TII->get(BNE)) + .addReg(Dest).addReg(OldVal).addMBB(exitMBB); + computeLiveIns(LiveRegs, *MRI, *loop1MBB); + + // loop2MBB: + // move scratch, NewVal + // sc Scratch, Scratch, 0(ptr) + // beq Scratch, $0, loop1MBB + BuildMI(loop2MBB, DL, TII->get(MOVE), Scratch).addReg(NewVal).addReg(ZERO); + BuildMI(loop2MBB, DL, TII->get(SC), Scratch) + .addReg(Scratch).addReg(Ptr).addImm(0); + BuildMI(loop2MBB, DL, TII->get(BEQ)) + .addReg(Scratch, RegState::Kill).addReg(ZERO).addMBB(loop1MBB); + computeLiveIns(LiveRegs, *MRI, *loop2MBB); + + NMBBI = BB.end(); + I->eraseFromParent(); + return true; +} + +bool MipsExpandPseudo::expandAtomicBinOpSubword( + MachineBasicBlock &BB, MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI) { + + MachineFunction *MF = BB.getParent(); + + const bool ArePtrs64bit = STI->getABI().ArePtrs64bit(); + DebugLoc DL = I->getDebugLoc(); + LivePhysRegs LiveRegs(TII->getRegisterInfo()); + + unsigned LL, SC, SEOp; + + if (STI->inMicroMipsMode()) { + LL = Mips::LL_MM; + SC = Mips::SC_MM; + } else { + LL = STI->hasMips32r6() ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6) + : (ArePtrs64bit ? Mips::LL64 : Mips::LL); + SC = STI->hasMips32r6() ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6) + : (ArePtrs64bit ? Mips::SC64 : Mips::SC); + } + + SEOp = Mips::SEH; + + bool IsSwap = false; + bool IsNand = false; + + unsigned Opcode = 0; + switch (I->getOpcode()) { + case Mips::ATOMIC_LOAD_NAND_I8_POSTRA: + SEOp = Mips::SEB; + LLVM_FALLTHROUGH; + case Mips::ATOMIC_LOAD_NAND_I16_POSTRA: + IsNand = true; + break; + case Mips::ATOMIC_SWAP_I8_POSTRA: + SEOp = Mips::SEB; + LLVM_FALLTHROUGH; + case Mips::ATOMIC_SWAP_I16_POSTRA: + IsSwap = true; + break; + case Mips::ATOMIC_LOAD_ADD_I8_POSTRA: + SEOp = Mips::SEB; + LLVM_FALLTHROUGH; + case Mips::ATOMIC_LOAD_ADD_I16_POSTRA: + Opcode = Mips::ADDu; + break; + case Mips::ATOMIC_LOAD_SUB_I8_POSTRA: + SEOp = Mips::SEB; + LLVM_FALLTHROUGH; + case Mips::ATOMIC_LOAD_SUB_I16_POSTRA: + Opcode = Mips::SUBu; + break; + case Mips::ATOMIC_LOAD_AND_I8_POSTRA: + SEOp = Mips::SEB; + LLVM_FALLTHROUGH; + case Mips::ATOMIC_LOAD_AND_I16_POSTRA: + Opcode = Mips::AND; + break; + case Mips::ATOMIC_LOAD_OR_I8_POSTRA: + SEOp = Mips::SEB; + LLVM_FALLTHROUGH; + case Mips::ATOMIC_LOAD_OR_I16_POSTRA: + Opcode = Mips::OR; + break; + case Mips::ATOMIC_LOAD_XOR_I8_POSTRA: + SEOp = Mips::SEB; + LLVM_FALLTHROUGH; + case Mips::ATOMIC_LOAD_XOR_I16_POSTRA: + Opcode = Mips::XOR; + break; + default: + llvm_unreachable("Unknown subword atomic pseudo for expansion!"); + } + + unsigned Dest = I->getOperand(0).getReg(); + unsigned Ptr = I->getOperand(1).getReg(); + unsigned Incr = I->getOperand(2).getReg(); + unsigned Mask = I->getOperand(3).getReg(); + unsigned Mask2 = I->getOperand(4).getReg(); + unsigned ShiftAmnt = I->getOperand(5).getReg(); + unsigned OldVal = I->getOperand(6).getReg(); + unsigned BinOpRes = I->getOperand(7).getReg(); + unsigned StoreVal = I->getOperand(8).getReg(); + + const BasicBlock *LLVM_BB = BB.getBasicBlock(); + MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineFunction::iterator It = ++BB.getIterator(); + MF->insert(It, loopMBB); + MF->insert(It, sinkMBB); + MF->insert(It, exitMBB); + + exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end()); + exitMBB->transferSuccessorsAndUpdatePHIs(&BB); + computeLiveIns(LiveRegs, *MRI, *exitMBB); + + BB.addSuccessor(loopMBB, BranchProbability::getOne()); + loopMBB->addSuccessor(sinkMBB); + loopMBB->addSuccessor(loopMBB); + + BuildMI(loopMBB, DL, TII->get(LL), OldVal).addReg(Ptr).addImm(0); + if (IsNand) { + // and andres, oldval, incr2 + // nor binopres, $0, andres + // and newval, binopres, mask + BuildMI(loopMBB, DL, TII->get(Mips::AND), BinOpRes) + .addReg(OldVal) + .addReg(Incr); + BuildMI(loopMBB, DL, TII->get(Mips::NOR), BinOpRes) + .addReg(Mips::ZERO) + .addReg(BinOpRes); + BuildMI(loopMBB, DL, TII->get(Mips::AND), BinOpRes) + .addReg(BinOpRes) + .addReg(Mask); + } else if (!IsSwap) { + // binopres, oldval, incr2 + // and newval, binopres, mask + BuildMI(loopMBB, DL, TII->get(Opcode), BinOpRes) + .addReg(OldVal) + .addReg(Incr); + BuildMI(loopMBB, DL, TII->get(Mips::AND), BinOpRes) + .addReg(BinOpRes) + .addReg(Mask); + } else { // atomic.swap + // and newval, incr2, mask + BuildMI(loopMBB, DL, TII->get(Mips::AND), BinOpRes) + .addReg(Incr) + .addReg(Mask); + } + + // and StoreVal, OlddVal, Mask2 + // or StoreVal, StoreVal, BinOpRes + // StoreVal = sc StoreVal, 0(Ptr) + // beq StoreVal, zero, loopMBB + BuildMI(loopMBB, DL, TII->get(Mips::AND), StoreVal) + .addReg(OldVal).addReg(Mask2); + BuildMI(loopMBB, DL, TII->get(Mips::OR), StoreVal) + .addReg(StoreVal).addReg(BinOpRes); + BuildMI(loopMBB, DL, TII->get(SC), StoreVal) + .addReg(StoreVal).addReg(Ptr).addImm(0); + BuildMI(loopMBB, DL, TII->get(Mips::BEQ)) + .addReg(StoreVal).addReg(Mips::ZERO).addMBB(loopMBB); + computeLiveIns(LiveRegs, *MRI, *loopMBB); + + // sinkMBB: + // and maskedoldval1,oldval,mask + // srl srlres,maskedoldval1,shiftamt + // sign_extend dest,srlres + + sinkMBB->addSuccessor(exitMBB, BranchProbability::getOne()); + + BuildMI(sinkMBB, DL, TII->get(Mips::AND), Dest) + .addReg(OldVal).addReg(Mask); + BuildMI(sinkMBB, DL, TII->get(Mips::SRLV), Dest) + .addReg(Dest).addReg(ShiftAmnt); + + if (STI->hasMips32r2()) { + BuildMI(sinkMBB, DL, TII->get(SEOp), Dest).addReg(Dest); + } else { + const unsigned ShiftImm = SEOp == Mips::SEH ? 16 : 24; + BuildMI(sinkMBB, DL, TII->get(Mips::SLL), Dest) + .addReg(Dest, RegState::Kill) + .addImm(ShiftImm); + BuildMI(sinkMBB, DL, TII->get(Mips::SRA), Dest) + .addReg(Dest, RegState::Kill) + .addImm(ShiftImm); + } + + computeLiveIns(LiveRegs, *MRI, *sinkMBB); + + NMBBI = BB.end(); + I->eraseFromParent(); + + return true; +} + +bool MipsExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI, + unsigned Size) { + MachineFunction *MF = BB.getParent(); + + const bool ArePtrs64bit = STI->getABI().ArePtrs64bit(); + DebugLoc DL = I->getDebugLoc(); + LivePhysRegs LiveRegs(TII->getRegisterInfo()); + + unsigned LL, SC, ZERO, BEQ; + + if (Size == 4) { + if (STI->inMicroMipsMode()) { + LL = Mips::LL_MM; + SC = Mips::SC_MM; + } else { + LL = STI->hasMips32r6() + ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6) + : (ArePtrs64bit ? Mips::LL64 : Mips::LL); + SC = STI->hasMips32r6() + ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6) + : (ArePtrs64bit ? Mips::SC64 : Mips::SC); + } + + ZERO = Mips::ZERO; + BEQ = Mips::BEQ; + } else { + LL = STI->hasMips64r6() ? Mips::LLD_R6 : Mips::LLD; + SC = STI->hasMips64r6() ? Mips::SCD_R6 : Mips::SCD; + ZERO = Mips::ZERO_64; + BEQ = Mips::BEQ64; + } + + unsigned OldVal = I->getOperand(0).getReg(); + unsigned Ptr = I->getOperand(1).getReg(); + unsigned Incr = I->getOperand(2).getReg(); + unsigned Scratch = I->getOperand(3).getReg(); + + unsigned Opcode = 0; + unsigned OR = 0; + unsigned AND = 0; + unsigned NOR = 0; + bool IsNand = false; + switch (I->getOpcode()) { + case Mips::ATOMIC_LOAD_ADD_I32_POSTRA: + Opcode = Mips::ADDu; + break; + case Mips::ATOMIC_LOAD_SUB_I32_POSTRA: + Opcode = Mips::SUBu; + break; + case Mips::ATOMIC_LOAD_AND_I32_POSTRA: + Opcode = Mips::AND; + break; + case Mips::ATOMIC_LOAD_OR_I32_POSTRA: + Opcode = Mips::OR; + break; + case Mips::ATOMIC_LOAD_XOR_I32_POSTRA: + Opcode = Mips::XOR; + break; + case Mips::ATOMIC_LOAD_NAND_I32_POSTRA: + IsNand = true; + AND = Mips::AND; + NOR = Mips::NOR; + break; + case Mips::ATOMIC_SWAP_I32_POSTRA: + OR = Mips::OR; + break; + case Mips::ATOMIC_LOAD_ADD_I64_POSTRA: + Opcode = Mips::DADDu; + break; + case Mips::ATOMIC_LOAD_SUB_I64_POSTRA: + Opcode = Mips::DSUBu; + break; + case Mips::ATOMIC_LOAD_AND_I64_POSTRA: + Opcode = Mips::AND64; + break; + case Mips::ATOMIC_LOAD_OR_I64_POSTRA: + Opcode = Mips::OR64; + break; + case Mips::ATOMIC_LOAD_XOR_I64_POSTRA: + Opcode = Mips::XOR64; + break; + case Mips::ATOMIC_LOAD_NAND_I64_POSTRA: + IsNand = true; + AND = Mips::AND64; + NOR = Mips::NOR64; + break; + case Mips::ATOMIC_SWAP_I64_POSTRA: + OR = Mips::OR64; + break; + default: + llvm_unreachable("Unknown pseudo atomic!"); + } + + const BasicBlock *LLVM_BB = BB.getBasicBlock(); + MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineFunction::iterator It = ++BB.getIterator(); + MF->insert(It, loopMBB); + MF->insert(It, exitMBB); + + exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end()); + exitMBB->transferSuccessorsAndUpdatePHIs(&BB); + computeLiveIns(LiveRegs, *MRI, *exitMBB); + + BB.addSuccessor(loopMBB, BranchProbability::getOne()); + loopMBB->addSuccessor(exitMBB); + loopMBB->addSuccessor(loopMBB); + + BuildMI(loopMBB, DL, TII->get(LL), OldVal).addReg(Ptr).addImm(0); + if (Opcode) { + BuildMI(loopMBB, DL, TII->get(Opcode), Scratch).addReg(OldVal).addReg(Incr); + } else if (IsNand) { + assert(AND && NOR && + "Unknown nand instruction for atomic pseudo expansion"); + BuildMI(loopMBB, DL, TII->get(AND), Scratch).addReg(OldVal).addReg(Incr); + BuildMI(loopMBB, DL, TII->get(NOR), Scratch).addReg(ZERO).addReg(Scratch); + } else { + assert(OR && "Unknown instruction for atomic pseudo expansion!"); + BuildMI(loopMBB, DL, TII->get(OR), Scratch).addReg(Incr).addReg(ZERO); + } + + BuildMI(loopMBB, DL, TII->get(SC), Scratch) + .addReg(Scratch) + .addReg(Ptr) + .addImm(0); + BuildMI(loopMBB, DL, TII->get(BEQ)) + .addReg(Scratch) + .addReg(ZERO) + .addMBB(loopMBB); + computeLiveIns(LiveRegs, *MRI, *loopMBB); + + NMBBI = BB.end(); + I->eraseFromParent(); + + return true; +} + +bool MipsExpandPseudo::expandMI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NMBB) { + + bool Modified = false; + + switch (MBBI->getOpcode()) { + case Mips::ATOMIC_CMP_SWAP_I32_POSTRA: + case Mips::ATOMIC_CMP_SWAP_I64_POSTRA: + return expandAtomicCmpSwap(MBB, MBBI, NMBB); + case Mips::ATOMIC_CMP_SWAP_I8_POSTRA: + case Mips::ATOMIC_CMP_SWAP_I16_POSTRA: + return expandAtomicCmpSwapSubword(MBB, MBBI, NMBB); + case Mips::ATOMIC_SWAP_I8_POSTRA: + case Mips::ATOMIC_SWAP_I16_POSTRA: + case Mips::ATOMIC_LOAD_NAND_I8_POSTRA: + case Mips::ATOMIC_LOAD_NAND_I16_POSTRA: + case Mips::ATOMIC_LOAD_ADD_I8_POSTRA: + case Mips::ATOMIC_LOAD_ADD_I16_POSTRA: + case Mips::ATOMIC_LOAD_SUB_I8_POSTRA: + case Mips::ATOMIC_LOAD_SUB_I16_POSTRA: + case Mips::ATOMIC_LOAD_AND_I8_POSTRA: + case Mips::ATOMIC_LOAD_AND_I16_POSTRA: + case Mips::ATOMIC_LOAD_OR_I8_POSTRA: + case Mips::ATOMIC_LOAD_OR_I16_POSTRA: + case Mips::ATOMIC_LOAD_XOR_I8_POSTRA: + case Mips::ATOMIC_LOAD_XOR_I16_POSTRA: + return expandAtomicBinOpSubword(MBB, MBBI, NMBB); + case Mips::ATOMIC_LOAD_ADD_I32_POSTRA: + case Mips::ATOMIC_LOAD_SUB_I32_POSTRA: + case Mips::ATOMIC_LOAD_AND_I32_POSTRA: + case Mips::ATOMIC_LOAD_OR_I32_POSTRA: + case Mips::ATOMIC_LOAD_XOR_I32_POSTRA: + case Mips::ATOMIC_LOAD_NAND_I32_POSTRA: + case Mips::ATOMIC_SWAP_I32_POSTRA: + return expandAtomicBinOp(MBB, MBBI, NMBB, 4); + case Mips::ATOMIC_LOAD_ADD_I64_POSTRA: + case Mips::ATOMIC_LOAD_SUB_I64_POSTRA: + case Mips::ATOMIC_LOAD_AND_I64_POSTRA: + case Mips::ATOMIC_LOAD_OR_I64_POSTRA: + case Mips::ATOMIC_LOAD_XOR_I64_POSTRA: + case Mips::ATOMIC_LOAD_NAND_I64_POSTRA: + case Mips::ATOMIC_SWAP_I64_POSTRA: + return expandAtomicBinOp(MBB, MBBI, NMBB, 8); + default: + return Modified; + } +} + +bool MipsExpandPseudo::expandMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + while (MBBI != E) { + MachineBasicBlock::iterator NMBBI = std::next(MBBI); + Modified |= expandMI(MBB, MBBI, NMBBI); + MBBI = NMBBI; + } + + return Modified; +} + +bool MipsExpandPseudo::runOnMachineFunction(MachineFunction &MF) { + STI = &static_cast(MF.getSubtarget()); + TII = STI->getInstrInfo(); + MRI = &MF.getRegInfo(); + + bool Modified = false; + for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E; + ++MFI) + Modified |= expandMBB(*MFI); + + return Modified; +} + +/// createMipsExpandPseudoPass - returns an instance of the pseudo instruction +/// expansion pass. +FunctionPass *llvm::createMipsExpandPseudoPass() { + return new MipsExpandPseudo(); +} Index: lib/Target/Mips/MipsISelLowering.h =================================================================== --- lib/Target/Mips/MipsISelLowering.h +++ lib/Target/Mips/MipsISelLowering.h @@ -620,17 +620,13 @@ unsigned Size, unsigned DstReg, unsigned SrcRec) const; - MachineBasicBlock *emitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, - unsigned Size, unsigned BinOpcode, - bool Nand = false) const; + MachineBasicBlock *emitAtomicBinary(MachineInstr &MI, + MachineBasicBlock *BB) const; MachineBasicBlock *emitAtomicBinaryPartword(MachineInstr &MI, MachineBasicBlock *BB, - unsigned Size, - unsigned BinOpcode, - bool Nand = false) const; + unsigned Size) const; MachineBasicBlock *emitAtomicCmpSwap(MachineInstr &MI, - MachineBasicBlock *BB, - unsigned Size) const; + MachineBasicBlock *BB) const; MachineBasicBlock *emitAtomicCmpSwapPartword(MachineInstr &MI, MachineBasicBlock *BB, unsigned Size) const; Index: lib/Target/Mips/MipsISelLowering.cpp =================================================================== --- lib/Target/Mips/MipsISelLowering.cpp +++ lib/Target/Mips/MipsISelLowering.cpp @@ -1124,76 +1124,76 @@ default: llvm_unreachable("Unexpected instr type to insert"); case Mips::ATOMIC_LOAD_ADD_I8: - return emitAtomicBinaryPartword(MI, BB, 1, Mips::ADDu); + return emitAtomicBinaryPartword(MI, BB, 1); case Mips::ATOMIC_LOAD_ADD_I16: - return emitAtomicBinaryPartword(MI, BB, 2, Mips::ADDu); + return emitAtomicBinaryPartword(MI, BB, 2); case Mips::ATOMIC_LOAD_ADD_I32: - return emitAtomicBinary(MI, BB, 4, Mips::ADDu); + return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_ADD_I64: - return emitAtomicBinary(MI, BB, 8, Mips::DADDu); + return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_AND_I8: - return emitAtomicBinaryPartword(MI, BB, 1, Mips::AND); + return emitAtomicBinaryPartword(MI, BB, 1); case Mips::ATOMIC_LOAD_AND_I16: - return emitAtomicBinaryPartword(MI, BB, 2, Mips::AND); + return emitAtomicBinaryPartword(MI, BB, 2); case Mips::ATOMIC_LOAD_AND_I32: - return emitAtomicBinary(MI, BB, 4, Mips::AND); + return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_AND_I64: - return emitAtomicBinary(MI, BB, 8, Mips::AND64); + return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_OR_I8: - return emitAtomicBinaryPartword(MI, BB, 1, Mips::OR); + return emitAtomicBinaryPartword(MI, BB, 1); case Mips::ATOMIC_LOAD_OR_I16: - return emitAtomicBinaryPartword(MI, BB, 2, Mips::OR); + return emitAtomicBinaryPartword(MI, BB, 2); case Mips::ATOMIC_LOAD_OR_I32: - return emitAtomicBinary(MI, BB, 4, Mips::OR); + return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_OR_I64: - return emitAtomicBinary(MI, BB, 8, Mips::OR64); + return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_XOR_I8: - return emitAtomicBinaryPartword(MI, BB, 1, Mips::XOR); + return emitAtomicBinaryPartword(MI, BB, 1); case Mips::ATOMIC_LOAD_XOR_I16: - return emitAtomicBinaryPartword(MI, BB, 2, Mips::XOR); + return emitAtomicBinaryPartword(MI, BB, 2); case Mips::ATOMIC_LOAD_XOR_I32: - return emitAtomicBinary(MI, BB, 4, Mips::XOR); + return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_XOR_I64: - return emitAtomicBinary(MI, BB, 8, Mips::XOR64); + return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_NAND_I8: - return emitAtomicBinaryPartword(MI, BB, 1, 0, true); + return emitAtomicBinaryPartword(MI, BB, 1); case Mips::ATOMIC_LOAD_NAND_I16: - return emitAtomicBinaryPartword(MI, BB, 2, 0, true); + return emitAtomicBinaryPartword(MI, BB, 2); case Mips::ATOMIC_LOAD_NAND_I32: - return emitAtomicBinary(MI, BB, 4, 0, true); + return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_NAND_I64: - return emitAtomicBinary(MI, BB, 8, 0, true); + return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_SUB_I8: - return emitAtomicBinaryPartword(MI, BB, 1, Mips::SUBu); + return emitAtomicBinaryPartword(MI, BB, 1); case Mips::ATOMIC_LOAD_SUB_I16: - return emitAtomicBinaryPartword(MI, BB, 2, Mips::SUBu); + return emitAtomicBinaryPartword(MI, BB, 2); case Mips::ATOMIC_LOAD_SUB_I32: - return emitAtomicBinary(MI, BB, 4, Mips::SUBu); + return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_SUB_I64: - return emitAtomicBinary(MI, BB, 8, Mips::DSUBu); + return emitAtomicBinary(MI, BB); case Mips::ATOMIC_SWAP_I8: - return emitAtomicBinaryPartword(MI, BB, 1, 0); + return emitAtomicBinaryPartword(MI, BB, 1); case Mips::ATOMIC_SWAP_I16: - return emitAtomicBinaryPartword(MI, BB, 2, 0); + return emitAtomicBinaryPartword(MI, BB, 2); case Mips::ATOMIC_SWAP_I32: - return emitAtomicBinary(MI, BB, 4, 0); + return emitAtomicBinary(MI, BB); case Mips::ATOMIC_SWAP_I64: - return emitAtomicBinary(MI, BB, 8, 0); + return emitAtomicBinary(MI, BB); case Mips::ATOMIC_CMP_SWAP_I8: return emitAtomicCmpSwapPartword(MI, BB, 1); case Mips::ATOMIC_CMP_SWAP_I16: return emitAtomicCmpSwapPartword(MI, BB, 2); case Mips::ATOMIC_CMP_SWAP_I32: - return emitAtomicCmpSwap(MI, BB, 4); + return emitAtomicCmpSwap(MI, BB); case Mips::ATOMIC_CMP_SWAP_I64: - return emitAtomicCmpSwap(MI, BB, 8); + return emitAtomicCmpSwap(MI, BB); case Mips::PseudoSDIV: case Mips::PseudoUDIV: case Mips::DIV: @@ -1250,99 +1250,114 @@ // This function also handles Mips::ATOMIC_SWAP_I32 (when BinOpcode == 0), and // Mips::ATOMIC_LOAD_NAND_I32 (when Nand == true) -MachineBasicBlock *MipsTargetLowering::emitAtomicBinary(MachineInstr &MI, - MachineBasicBlock *BB, - unsigned Size, - unsigned BinOpcode, - bool Nand) const { - assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicBinary."); +MachineBasicBlock * +MipsTargetLowering::emitAtomicBinary(MachineInstr &MI, + MachineBasicBlock *BB) const { MachineFunction *MF = BB->getParent(); MachineRegisterInfo &RegInfo = MF->getRegInfo(); - const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8)); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); - const bool ArePtrs64bit = ABI.ArePtrs64bit(); DebugLoc DL = MI.getDebugLoc(); - unsigned LL, SC, AND, NOR, ZERO, BEQ; - if (Size == 4) { - if (isMicroMips) { - LL = Mips::LL_MM; - SC = Mips::SC_MM; - } else { - LL = Subtarget.hasMips32r6() - ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6) - : (ArePtrs64bit ? Mips::LL64 : Mips::LL); - SC = Subtarget.hasMips32r6() - ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6) - : (ArePtrs64bit ? Mips::SC64 : Mips::SC); - } - - AND = Mips::AND; - NOR = Mips::NOR; - ZERO = Mips::ZERO; - BEQ = Mips::BEQ; - } else { - LL = Subtarget.hasMips64r6() ? Mips::LLD_R6 : Mips::LLD; - SC = Subtarget.hasMips64r6() ? Mips::SCD_R6 : Mips::SCD; - AND = Mips::AND64; - NOR = Mips::NOR64; - ZERO = Mips::ZERO_64; - BEQ = Mips::BEQ64; + unsigned AtomicOp; + switch (MI.getOpcode()) { + case Mips::ATOMIC_LOAD_ADD_I32: + AtomicOp = Mips::ATOMIC_LOAD_ADD_I32_POSTRA; + break; + case Mips::ATOMIC_LOAD_SUB_I32: + AtomicOp = Mips::ATOMIC_LOAD_SUB_I32_POSTRA; + break; + case Mips::ATOMIC_LOAD_AND_I32: + AtomicOp = Mips::ATOMIC_LOAD_AND_I32_POSTRA; + break; + case Mips::ATOMIC_LOAD_OR_I32: + AtomicOp = Mips::ATOMIC_LOAD_OR_I32_POSTRA; + break; + case Mips::ATOMIC_LOAD_XOR_I32: + AtomicOp = Mips::ATOMIC_LOAD_XOR_I32_POSTRA; + break; + case Mips::ATOMIC_LOAD_NAND_I32: + AtomicOp = Mips::ATOMIC_LOAD_NAND_I32_POSTRA; + break; + case Mips::ATOMIC_SWAP_I32: + AtomicOp = Mips::ATOMIC_SWAP_I32_POSTRA; + break; + case Mips::ATOMIC_LOAD_ADD_I64: + AtomicOp = Mips::ATOMIC_LOAD_ADD_I64_POSTRA; + break; + case Mips::ATOMIC_LOAD_SUB_I64: + AtomicOp = Mips::ATOMIC_LOAD_SUB_I64_POSTRA; + break; + case Mips::ATOMIC_LOAD_AND_I64: + AtomicOp = Mips::ATOMIC_LOAD_AND_I64_POSTRA; + break; + case Mips::ATOMIC_LOAD_OR_I64: + AtomicOp = Mips::ATOMIC_LOAD_OR_I64_POSTRA; + break; + case Mips::ATOMIC_LOAD_XOR_I64: + AtomicOp = Mips::ATOMIC_LOAD_XOR_I64_POSTRA; + break; + case Mips::ATOMIC_LOAD_NAND_I64: + AtomicOp = Mips::ATOMIC_LOAD_NAND_I64_POSTRA; + break; + case Mips::ATOMIC_SWAP_I64: + AtomicOp = Mips::ATOMIC_SWAP_I64_POSTRA; + break; + default: + llvm_unreachable("Unknown pseudo atomic for replacement!"); } unsigned OldVal = MI.getOperand(0).getReg(); unsigned Ptr = MI.getOperand(1).getReg(); unsigned Incr = MI.getOperand(2).getReg(); + unsigned Scratch = RegInfo.createVirtualRegister(RegInfo.getRegClass(OldVal)); - unsigned StoreVal = RegInfo.createVirtualRegister(RC); - unsigned AndRes = RegInfo.createVirtualRegister(RC); - unsigned Success = RegInfo.createVirtualRegister(RC); - - // insert new blocks after the current block - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineFunction::iterator It = ++BB->getIterator(); - MF->insert(It, loopMBB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - // thisMBB: - // ... - // fallthrough --> loopMBB - BB->addSuccessor(loopMBB); - loopMBB->addSuccessor(loopMBB); - loopMBB->addSuccessor(exitMBB); - - // loopMBB: - // ll oldval, 0(ptr) - // storeval, oldval, incr - // sc success, storeval, 0(ptr) - // beq success, $0, loopMBB - BB = loopMBB; - BuildMI(BB, DL, TII->get(LL), OldVal).addReg(Ptr).addImm(0); - if (Nand) { - // and andres, oldval, incr - // nor storeval, $0, andres - BuildMI(BB, DL, TII->get(AND), AndRes).addReg(OldVal).addReg(Incr); - BuildMI(BB, DL, TII->get(NOR), StoreVal).addReg(ZERO).addReg(AndRes); - } else if (BinOpcode) { - // storeval, oldval, incr - BuildMI(BB, DL, TII->get(BinOpcode), StoreVal).addReg(OldVal).addReg(Incr); - } else { - StoreVal = Incr; - } - BuildMI(BB, DL, TII->get(SC), Success).addReg(StoreVal).addReg(Ptr).addImm(0); - BuildMI(BB, DL, TII->get(BEQ)).addReg(Success).addReg(ZERO).addMBB(loopMBB); + MachineBasicBlock::iterator II(MI); - MI.eraseFromParent(); // The instruction is gone now. + // The scratch registers here with the EarlyClobber | Define | Dead flags + // | Implicit is used to persuade the register allocator and the machine + // verifier to accept the usage of this register. This has to be a real + // register which has an UNDEF value but is dead after the instruction which + // is unique amoung the registers chosen for the instruction. + + // The EarlyClobber flag has the semantic properties that the operand it is + // attached to is clobbered before the rest of the inputs are read. Hence it + // must be unique amoung the operands to the instruction. + // The Define flag is needed to coerce the machine verifier that an Undef + // value isn't a problem. + // The Dead flag is needed as the value in scratch isn't used by any other + // instruction. + // The implicit flag is here due to the interaction between the other flags + // and the machine verifier. + + // For correctness purpose, a new pseudo is introduced here. We need this + // new pseudo, so that FastRegisterAllocator does not see an ll/sc sequence + // that is spread over >1 basic blocks. A register allocator which + // introduces (or any codegen infact) a store, can violate the expactations + // of the hardware. + // + // An atomic read-modify-write sequence starts with a linked load + // instruction and ends with a store conditional instruction. The atomic + // read-modify-write sequence fails if any of the following conditions + // occur between the execution of ll and sc: + // * A coherent store is completed by another processor or coherent I/O + // module into the block of synchronizable physical memory containing + // the word. The size and alignment of the block is + // implementation-dependent. + // * A memory operation such as a lood, store or pretch is executed + // between the ll and sc. The memory operation may cause a cache + // eviction that results in the sc failing. + + BuildMI(*BB, II, DL, TII->get(AtomicOp)) + .addReg(OldVal, RegState::EarlyClobber | RegState::Define) + .addReg(Ptr) + .addReg(Incr) + .addReg(Scratch, RegState::Define | RegState::Dead | + RegState::EarlyClobber | RegState::Implicit); + + MI.eraseFromParent(); - return exitMBB; + return BB; } MachineBasicBlock *MipsTargetLowering::emitSignExtendToI32InReg( @@ -1376,72 +1391,83 @@ } MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword( - MachineInstr &MI, MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode, - bool Nand) const { + MachineInstr &MI, MachineBasicBlock *BB, unsigned Size) const { assert((Size == 1 || Size == 2) && "Unsupported size for EmitAtomicBinaryPartial."); MachineFunction *MF = BB->getParent(); MachineRegisterInfo &RegInfo = MF->getRegInfo(); - const TargetRegisterClass *RC = getRegClassFor(MVT::i32); const bool ArePtrs64bit = ABI.ArePtrs64bit(); - const TargetRegisterClass *RCp = - getRegClassFor(ArePtrs64bit ? MVT::i64 : MVT::i32); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); + MachineBasicBlock::iterator II(MI); unsigned Dest = MI.getOperand(0).getReg(); unsigned Ptr = MI.getOperand(1).getReg(); unsigned Incr = MI.getOperand(2).getReg(); + const TargetRegisterClass *RC = RegInfo.getRegClass(Incr); + const TargetRegisterClass *RCp = RegInfo.getRegClass(Ptr); + unsigned AlignedAddr = RegInfo.createVirtualRegister(RCp); unsigned ShiftAmt = RegInfo.createVirtualRegister(RC); unsigned Mask = RegInfo.createVirtualRegister(RC); unsigned Mask2 = RegInfo.createVirtualRegister(RC); - unsigned NewVal = RegInfo.createVirtualRegister(RC); - unsigned OldVal = RegInfo.createVirtualRegister(RC); unsigned Incr2 = RegInfo.createVirtualRegister(RC); unsigned MaskLSB2 = RegInfo.createVirtualRegister(RCp); unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC); unsigned MaskUpper = RegInfo.createVirtualRegister(RC); - unsigned AndRes = RegInfo.createVirtualRegister(RC); - unsigned BinOpRes = RegInfo.createVirtualRegister(RC); - unsigned MaskedOldVal0 = RegInfo.createVirtualRegister(RC); - unsigned StoreVal = RegInfo.createVirtualRegister(RC); - unsigned MaskedOldVal1 = RegInfo.createVirtualRegister(RC); - unsigned SrlRes = RegInfo.createVirtualRegister(RC); - unsigned Success = RegInfo.createVirtualRegister(RC); - - unsigned LL, SC; - if (isMicroMips) { - LL = Mips::LL_MM; - SC = Mips::SC_MM; - } else { - LL = Subtarget.hasMips32r6() ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6) - : (ArePtrs64bit ? Mips::LL64 : Mips::LL); - SC = Subtarget.hasMips32r6() ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6) - : (ArePtrs64bit ? Mips::SC64 : Mips::SC); - } + unsigned Scratch = RegInfo.createVirtualRegister(RC); + unsigned Scratch2 = RegInfo.createVirtualRegister(RC); + unsigned Scratch3 = RegInfo.createVirtualRegister(RC); - // insert new blocks after the current block - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineFunction::iterator It = ++BB->getIterator(); - MF->insert(It, loopMBB); - MF->insert(It, sinkMBB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - BB->addSuccessor(loopMBB); - loopMBB->addSuccessor(loopMBB); - loopMBB->addSuccessor(sinkMBB); - sinkMBB->addSuccessor(exitMBB); + unsigned AtomicOp = 0; + switch (MI.getOpcode()) { + case Mips::ATOMIC_LOAD_NAND_I8: + AtomicOp = Mips::ATOMIC_LOAD_NAND_I8_POSTRA; + break; + case Mips::ATOMIC_LOAD_NAND_I16: + AtomicOp = Mips::ATOMIC_LOAD_NAND_I16_POSTRA; + break; + case Mips::ATOMIC_SWAP_I8: + AtomicOp = Mips::ATOMIC_SWAP_I8_POSTRA; + break; + case Mips::ATOMIC_SWAP_I16: + AtomicOp = Mips::ATOMIC_SWAP_I16_POSTRA; + break; + case Mips::ATOMIC_LOAD_ADD_I8: + AtomicOp = Mips::ATOMIC_LOAD_ADD_I8_POSTRA; + break; + case Mips::ATOMIC_LOAD_ADD_I16: + AtomicOp = Mips::ATOMIC_LOAD_ADD_I16_POSTRA; + break; + case Mips::ATOMIC_LOAD_SUB_I8: + AtomicOp = Mips::ATOMIC_LOAD_SUB_I8_POSTRA; + break; + case Mips::ATOMIC_LOAD_SUB_I16: + AtomicOp = Mips::ATOMIC_LOAD_SUB_I16_POSTRA; + break; + case Mips::ATOMIC_LOAD_AND_I8: + AtomicOp = Mips::ATOMIC_LOAD_AND_I8_POSTRA; + break; + case Mips::ATOMIC_LOAD_AND_I16: + AtomicOp = Mips::ATOMIC_LOAD_AND_I16_POSTRA; + break; + case Mips::ATOMIC_LOAD_OR_I8: + AtomicOp = Mips::ATOMIC_LOAD_OR_I8_POSTRA; + break; + case Mips::ATOMIC_LOAD_OR_I16: + AtomicOp = Mips::ATOMIC_LOAD_OR_I16_POSTRA; + break; + case Mips::ATOMIC_LOAD_XOR_I8: + AtomicOp = Mips::ATOMIC_LOAD_XOR_I8_POSTRA; + break; + case Mips::ATOMIC_LOAD_XOR_I16: + AtomicOp = Mips::ATOMIC_LOAD_XOR_I16_POSTRA; + break; + default: + llvm_unreachable("Unknown subword atomic pseudo for expansion!"); + } // thisMBB: // addiu masklsb2,$0,-4 # 0xfffffffc @@ -1454,180 +1480,97 @@ // sll incr2,incr,shiftamt int64_t MaskImm = (Size == 1) ? 255 : 65535; - BuildMI(BB, DL, TII->get(ABI.GetPtrAddiuOp()), MaskLSB2) + BuildMI(*BB, II, DL, TII->get(ABI.GetPtrAddiuOp()), MaskLSB2) .addReg(ABI.GetNullPtr()).addImm(-4); - BuildMI(BB, DL, TII->get(ABI.GetPtrAndOp()), AlignedAddr) + BuildMI(*BB, II, DL, TII->get(ABI.GetPtrAndOp()), AlignedAddr) .addReg(Ptr).addReg(MaskLSB2); - BuildMI(BB, DL, TII->get(Mips::ANDi), PtrLSB2) + BuildMI(*BB, II, DL, TII->get(Mips::ANDi), PtrLSB2) .addReg(Ptr, 0, ArePtrs64bit ? Mips::sub_32 : 0).addImm(3); if (Subtarget.isLittle()) { - BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3); + BuildMI(*BB, II, DL, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3); } else { unsigned Off = RegInfo.createVirtualRegister(RC); - BuildMI(BB, DL, TII->get(Mips::XORi), Off) + BuildMI(*BB, II, DL, TII->get(Mips::XORi), Off) .addReg(PtrLSB2).addImm((Size == 1) ? 3 : 2); - BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(Off).addImm(3); + BuildMI(*BB, II, DL, TII->get(Mips::SLL), ShiftAmt).addReg(Off).addImm(3); } - BuildMI(BB, DL, TII->get(Mips::ORi), MaskUpper) + BuildMI(*BB, II, DL, TII->get(Mips::ORi), MaskUpper) .addReg(Mips::ZERO).addImm(MaskImm); - BuildMI(BB, DL, TII->get(Mips::SLLV), Mask) + BuildMI(*BB, II, DL, TII->get(Mips::SLLV), Mask) .addReg(MaskUpper).addReg(ShiftAmt); - BuildMI(BB, DL, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask); - BuildMI(BB, DL, TII->get(Mips::SLLV), Incr2).addReg(Incr).addReg(ShiftAmt); - - // atomic.load.binop - // loopMBB: - // ll oldval,0(alignedaddr) - // binop binopres,oldval,incr2 - // and newval,binopres,mask - // and maskedoldval0,oldval,mask2 - // or storeval,maskedoldval0,newval - // sc success,storeval,0(alignedaddr) - // beq success,$0,loopMBB - - // atomic.swap - // loopMBB: - // ll oldval,0(alignedaddr) - // and newval,incr2,mask - // and maskedoldval0,oldval,mask2 - // or storeval,maskedoldval0,newval - // sc success,storeval,0(alignedaddr) - // beq success,$0,loopMBB - - BB = loopMBB; - BuildMI(BB, DL, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0); - if (Nand) { - // and andres, oldval, incr2 - // nor binopres, $0, andres - // and newval, binopres, mask - BuildMI(BB, DL, TII->get(Mips::AND), AndRes).addReg(OldVal).addReg(Incr2); - BuildMI(BB, DL, TII->get(Mips::NOR), BinOpRes) - .addReg(Mips::ZERO).addReg(AndRes); - BuildMI(BB, DL, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask); - } else if (BinOpcode) { - // binopres, oldval, incr2 - // and newval, binopres, mask - BuildMI(BB, DL, TII->get(BinOpcode), BinOpRes).addReg(OldVal).addReg(Incr2); - BuildMI(BB, DL, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask); - } else { // atomic.swap - // and newval, incr2, mask - BuildMI(BB, DL, TII->get(Mips::AND), NewVal).addReg(Incr2).addReg(Mask); - } - - BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal0) - .addReg(OldVal).addReg(Mask2); - BuildMI(BB, DL, TII->get(Mips::OR), StoreVal) - .addReg(MaskedOldVal0).addReg(NewVal); - BuildMI(BB, DL, TII->get(SC), Success) - .addReg(StoreVal).addReg(AlignedAddr).addImm(0); - BuildMI(BB, DL, TII->get(Mips::BEQ)) - .addReg(Success).addReg(Mips::ZERO).addMBB(loopMBB); - - // sinkMBB: - // and maskedoldval1,oldval,mask - // srl srlres,maskedoldval1,shiftamt - // sign_extend dest,srlres - BB = sinkMBB; - - BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal1) - .addReg(OldVal).addReg(Mask); - BuildMI(BB, DL, TII->get(Mips::SRLV), SrlRes) - .addReg(MaskedOldVal1).addReg(ShiftAmt); - BB = emitSignExtendToI32InReg(MI, BB, Size, Dest, SrlRes); + BuildMI(*BB, II, DL, TII->get(Mips::NOR), Mask2) + .addReg(Mips::ZERO) + .addReg(Mask); + BuildMI(*BB, II, DL, TII->get(Mips::SLLV), Incr2) + .addReg(Incr) + .addReg(ShiftAmt); + + // The purposes of the flags on the scratch registers is explained in + // emitAtomicBinary. In summary, we need a scratch register which is going to + // be undef, that is unique amoung the register chosen for the instruction. + + BuildMI(*BB, II, DL, TII->get(AtomicOp), Dest) + .addReg(AlignedAddr) + .addReg(Incr2) + .addReg(Mask) + .addReg(Mask2) + .addReg(ShiftAmt) + .addReg(Scratch, RegState::EarlyClobber | RegState::Define | + RegState::Dead | RegState::Implicit) + .addReg(Scratch2, RegState::EarlyClobber | RegState::Define | + RegState::Dead | RegState::Implicit) + .addReg(Scratch3, RegState::EarlyClobber | RegState::Define | + RegState::Dead | RegState::Implicit); MI.eraseFromParent(); // The instruction is gone now. - return exitMBB; + return BB; } -MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwap(MachineInstr &MI, - MachineBasicBlock *BB, - unsigned Size) const { - assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicCmpSwap."); +// Lower atomic compare and swap to a pseudo instruction, taking care to +// define a scratch register for the pseudo instruction's expansion. The +// instruction is expanded after the register allocator as to prevent +// the insertion of stores between the linked load and the store conditional. + +MachineBasicBlock * +MipsTargetLowering::emitAtomicCmpSwap(MachineInstr &MI, + MachineBasicBlock *BB) const { + + assert((MI.getOpcode() == Mips::ATOMIC_CMP_SWAP_I32 || + MI.getOpcode() == Mips::ATOMIC_CMP_SWAP_I64) && + "Unsupported atomic psseudo for EmitAtomicCmpSwap."); MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &RegInfo = MF->getRegInfo(); - const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8)); + MachineRegisterInfo &MRI = MF->getRegInfo(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); - const bool ArePtrs64bit = ABI.ArePtrs64bit(); DebugLoc DL = MI.getDebugLoc(); - unsigned LL, SC, ZERO, BNE, BEQ; - - if (Size == 4) { - if (isMicroMips) { - LL = Mips::LL_MM; - SC = Mips::SC_MM; - } else { - LL = Subtarget.hasMips32r6() - ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6) - : (ArePtrs64bit ? Mips::LL64 : Mips::LL); - SC = Subtarget.hasMips32r6() - ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6) - : (ArePtrs64bit ? Mips::SC64 : Mips::SC); - } - - ZERO = Mips::ZERO; - BNE = Mips::BNE; - BEQ = Mips::BEQ; - } else { - LL = Subtarget.hasMips64r6() ? Mips::LLD_R6 : Mips::LLD; - SC = Subtarget.hasMips64r6() ? Mips::SCD_R6 : Mips::SCD; - ZERO = Mips::ZERO_64; - BNE = Mips::BNE64; - BEQ = Mips::BEQ64; - } + unsigned AtomicOp = MI.getOpcode() == Mips::ATOMIC_CMP_SWAP_I32 + ? Mips::ATOMIC_CMP_SWAP_I32_POSTRA + : Mips::ATOMIC_CMP_SWAP_I64_POSTRA; unsigned Dest = MI.getOperand(0).getReg(); unsigned Ptr = MI.getOperand(1).getReg(); unsigned OldVal = MI.getOperand(2).getReg(); unsigned NewVal = MI.getOperand(3).getReg(); - unsigned Success = RegInfo.createVirtualRegister(RC); - - // insert new blocks after the current block - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineFunction::iterator It = ++BB->getIterator(); - MF->insert(It, loop1MBB); - MF->insert(It, loop2MBB); - MF->insert(It, exitMBB); + unsigned Scratch = MRI.createVirtualRegister(MRI.getRegClass(OldVal)); + MachineBasicBlock::iterator II(MI); - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); + // The purposes of the flags on the scratch registers is explained in + // emitAtomicBinary. In summary, we need a scratch register which is going to + // be undef, that is unique amoung the register chosen for the instruction. - // thisMBB: - // ... - // fallthrough --> loop1MBB - BB->addSuccessor(loop1MBB); - loop1MBB->addSuccessor(exitMBB); - loop1MBB->addSuccessor(loop2MBB); - loop2MBB->addSuccessor(loop1MBB); - loop2MBB->addSuccessor(exitMBB); - - // loop1MBB: - // ll dest, 0(ptr) - // bne dest, oldval, exitMBB - BB = loop1MBB; - BuildMI(BB, DL, TII->get(LL), Dest).addReg(Ptr).addImm(0); - BuildMI(BB, DL, TII->get(BNE)) - .addReg(Dest).addReg(OldVal).addMBB(exitMBB); - - // loop2MBB: - // sc success, newval, 0(ptr) - // beq success, $0, loop1MBB - BB = loop2MBB; - BuildMI(BB, DL, TII->get(SC), Success) - .addReg(NewVal).addReg(Ptr).addImm(0); - BuildMI(BB, DL, TII->get(BEQ)) - .addReg(Success).addReg(ZERO).addMBB(loop1MBB); + BuildMI(*BB, II, DL, TII->get(AtomicOp)) + .addReg(Dest, RegState::EarlyClobber | RegState::Define) + .addReg(Ptr) + .addReg(OldVal) + .addReg(NewVal) + .addReg(Scratch, RegState::EarlyClobber | RegState::Define | + RegState::Dead | RegState::Implicit); MI.eraseFromParent(); // The instruction is gone now. - return exitMBB; + return BB; } MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword( @@ -1637,72 +1580,37 @@ MachineFunction *MF = BB->getParent(); MachineRegisterInfo &RegInfo = MF->getRegInfo(); - const TargetRegisterClass *RC = getRegClassFor(MVT::i32); const bool ArePtrs64bit = ABI.ArePtrs64bit(); - const TargetRegisterClass *RCp = - getRegClassFor(ArePtrs64bit ? MVT::i64 : MVT::i32); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); - unsigned Dest = MI.getOperand(0).getReg(); + unsigned Res = MI.getOperand(0).getReg(); unsigned Ptr = MI.getOperand(1).getReg(); unsigned CmpVal = MI.getOperand(2).getReg(); unsigned NewVal = MI.getOperand(3).getReg(); + const TargetRegisterClass *RC = RegInfo.getRegClass(CmpVal); + const TargetRegisterClass *RCp = RegInfo.getRegClass(Ptr); + unsigned AlignedAddr = RegInfo.createVirtualRegister(RCp); unsigned ShiftAmt = RegInfo.createVirtualRegister(RC); unsigned Mask = RegInfo.createVirtualRegister(RC); unsigned Mask2 = RegInfo.createVirtualRegister(RC); unsigned ShiftedCmpVal = RegInfo.createVirtualRegister(RC); - unsigned OldVal = RegInfo.createVirtualRegister(RC); - unsigned MaskedOldVal0 = RegInfo.createVirtualRegister(RC); unsigned ShiftedNewVal = RegInfo.createVirtualRegister(RC); unsigned MaskLSB2 = RegInfo.createVirtualRegister(RCp); unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC); unsigned MaskUpper = RegInfo.createVirtualRegister(RC); unsigned MaskedCmpVal = RegInfo.createVirtualRegister(RC); unsigned MaskedNewVal = RegInfo.createVirtualRegister(RC); - unsigned MaskedOldVal1 = RegInfo.createVirtualRegister(RC); - unsigned StoreVal = RegInfo.createVirtualRegister(RC); - unsigned SrlRes = RegInfo.createVirtualRegister(RC); - unsigned Success = RegInfo.createVirtualRegister(RC); - unsigned LL, SC; - - if (isMicroMips) { - LL = Mips::LL_MM; - SC = Mips::SC_MM; - } else { - LL = Subtarget.hasMips32r6() ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6) - : (ArePtrs64bit ? Mips::LL64 : Mips::LL); - SC = Subtarget.hasMips32r6() ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6) - : (ArePtrs64bit ? Mips::SC64 : Mips::SC); - } + unsigned AtomicOp = MI.getOpcode() == Mips::ATOMIC_CMP_SWAP_I8 + ? Mips::ATOMIC_CMP_SWAP_I8_POSTRA + : Mips::ATOMIC_CMP_SWAP_I16_POSTRA; - // insert new blocks after the current block - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineFunction::iterator It = ++BB->getIterator(); - MF->insert(It, loop1MBB); - MF->insert(It, loop2MBB); - MF->insert(It, sinkMBB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - BB->addSuccessor(loop1MBB); - loop1MBB->addSuccessor(sinkMBB); - loop1MBB->addSuccessor(loop2MBB); - loop2MBB->addSuccessor(loop1MBB); - loop2MBB->addSuccessor(sinkMBB); - sinkMBB->addSuccessor(exitMBB); + unsigned Scratch = RegInfo.createVirtualRegister(RC); + unsigned Scratch2 = RegInfo.createVirtualRegister(RC); + MachineBasicBlock::iterator II(MI); - // FIXME: computation of newval2 can be moved to loop2MBB. // thisMBB: // addiu masklsb2,$0,-4 # 0xfffffffc // and alignedaddr,ptr,masklsb2 @@ -1717,72 +1625,67 @@ // andi maskednewval,newval,255 // sll shiftednewval,maskednewval,shiftamt int64_t MaskImm = (Size == 1) ? 255 : 65535; - BuildMI(BB, DL, TII->get(ArePtrs64bit ? Mips::DADDiu : Mips::ADDiu), MaskLSB2) - .addReg(ABI.GetNullPtr()).addImm(-4); - BuildMI(BB, DL, TII->get(ArePtrs64bit ? Mips::AND64 : Mips::AND), AlignedAddr) - .addReg(Ptr).addReg(MaskLSB2); - BuildMI(BB, DL, TII->get(Mips::ANDi), PtrLSB2) - .addReg(Ptr, 0, ArePtrs64bit ? Mips::sub_32 : 0).addImm(3); + BuildMI(*BB, II, DL, TII->get(ArePtrs64bit ? Mips::DADDiu : Mips::ADDiu), + MaskLSB2) + .addReg(ABI.GetNullPtr()) + .addImm(-4); + BuildMI(*BB, II, DL, TII->get(ArePtrs64bit ? Mips::AND64 : Mips::AND), + AlignedAddr) + .addReg(Ptr) + .addReg(MaskLSB2); + BuildMI(*BB, II, DL, TII->get(Mips::ANDi), PtrLSB2) + .addReg(Ptr, 0, ArePtrs64bit ? Mips::sub_32 : 0) + .addImm(3); if (Subtarget.isLittle()) { - BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3); + BuildMI(*BB, II, DL, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3); } else { unsigned Off = RegInfo.createVirtualRegister(RC); - BuildMI(BB, DL, TII->get(Mips::XORi), Off) + BuildMI(*BB, II, DL, TII->get(Mips::XORi), Off) .addReg(PtrLSB2).addImm((Size == 1) ? 3 : 2); - BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(Off).addImm(3); + BuildMI(*BB, II, DL, TII->get(Mips::SLL), ShiftAmt).addReg(Off).addImm(3); } - BuildMI(BB, DL, TII->get(Mips::ORi), MaskUpper) + BuildMI(*BB, II, DL, TII->get(Mips::ORi), MaskUpper) .addReg(Mips::ZERO).addImm(MaskImm); - BuildMI(BB, DL, TII->get(Mips::SLLV), Mask) + BuildMI(*BB, II, DL, TII->get(Mips::SLLV), Mask) .addReg(MaskUpper).addReg(ShiftAmt); - BuildMI(BB, DL, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask); - BuildMI(BB, DL, TII->get(Mips::ANDi), MaskedCmpVal) + BuildMI(*BB, II, DL, TII->get(Mips::NOR), Mask2) + .addReg(Mips::ZERO) + .addReg(Mask); + BuildMI(*BB, II, DL, TII->get(Mips::ANDi), MaskedCmpVal) .addReg(CmpVal).addImm(MaskImm); - BuildMI(BB, DL, TII->get(Mips::SLLV), ShiftedCmpVal) + BuildMI(*BB, II, DL, TII->get(Mips::SLLV), ShiftedCmpVal) .addReg(MaskedCmpVal).addReg(ShiftAmt); - BuildMI(BB, DL, TII->get(Mips::ANDi), MaskedNewVal) + BuildMI(*BB, II, DL, TII->get(Mips::ANDi), MaskedNewVal) .addReg(NewVal).addImm(MaskImm); - BuildMI(BB, DL, TII->get(Mips::SLLV), ShiftedNewVal) + BuildMI(*BB, II, DL, TII->get(Mips::SLLV), ShiftedNewVal) .addReg(MaskedNewVal).addReg(ShiftAmt); - // loop1MBB: - // ll oldval,0(alginedaddr) - // and maskedoldval0,oldval,mask - // bne maskedoldval0,shiftedcmpval,sinkMBB - BB = loop1MBB; - BuildMI(BB, DL, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0); - BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal0) - .addReg(OldVal).addReg(Mask); - BuildMI(BB, DL, TII->get(Mips::BNE)) - .addReg(MaskedOldVal0).addReg(ShiftedCmpVal).addMBB(sinkMBB); - - // loop2MBB: - // and maskedoldval1,oldval,mask2 - // or storeval,maskedoldval1,shiftednewval - // sc success,storeval,0(alignedaddr) - // beq success,$0,loop1MBB - BB = loop2MBB; - BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal1) - .addReg(OldVal).addReg(Mask2); - BuildMI(BB, DL, TII->get(Mips::OR), StoreVal) - .addReg(MaskedOldVal1).addReg(ShiftedNewVal); - BuildMI(BB, DL, TII->get(SC), Success) - .addReg(StoreVal).addReg(AlignedAddr).addImm(0); - BuildMI(BB, DL, TII->get(Mips::BEQ)) - .addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB); - - // sinkMBB: - // srl srlres,maskedoldval0,shiftamt - // sign_extend dest,srlres - BB = sinkMBB; - - BuildMI(BB, DL, TII->get(Mips::SRLV), SrlRes) - .addReg(MaskedOldVal0).addReg(ShiftAmt); - BB = emitSignExtendToI32InReg(MI, BB, Size, Dest, SrlRes); + // The scratch registers here with the EarlyClobber | Define | Dead flags is + // used to coerce the register allocator and the machine verifier to accept + // the usage of this register. + // The EarlyClobber flag has the semantic properties that the operand it is + // attached to is clobbered before the rest of the inputs are read. Hence it + // must be unique amoung the operands to the instruction. + // The Define flag is needed to coerce the machine verifier that an Undef + // value isn't a problem. + // The Dead flag is needed as the value in scratch isn't used by any other + // instruction. + BuildMI(*BB, II, DL, TII->get(AtomicOp), Res) + .addReg(AlignedAddr) + .addReg(Mask) + .addReg(ShiftedCmpVal) + .addReg(Mask2) + .addReg(ShiftedNewVal) + .addReg(ShiftAmt) + .addReg(Scratch, RegState::EarlyClobber | RegState::Define | + RegState::Dead | RegState::Implicit | + RegState::EarlyClobber) + .addReg(Scratch2, RegState::EarlyClobber | RegState::Define | + RegState::Dead | RegState::Implicit); MI.eraseFromParent(); // The instruction is gone now. - return exitMBB; + return BB; } MachineBasicBlock *MipsTargetLowering::emitSEL_D(MachineInstr &MI, Index: lib/Target/Mips/MipsInstrInfo.td =================================================================== --- lib/Target/Mips/MipsInstrInfo.td +++ lib/Target/Mips/MipsInstrInfo.td @@ -1670,12 +1670,47 @@ // Atomic instructions with 2 source operands (ATOMIC_SWAP & ATOMIC_LOAD_*). class Atomic2Ops : PseudoSE<(outs DRC:$dst), (ins PtrRC:$ptr, DRC:$incr), - [(set DRC:$dst, (Op iPTR:$ptr, DRC:$incr))]>; + [(set DRC:$dst, (Op iPTR:$ptr, DRC:$incr))]> { + let mayLoad = 1; + let mayStore = 1; +} + +class Atomic2OpsPostRA : + PseudoSE<(outs RC:$dst), (ins PtrRC:$ptr, RC:$incr), []> { + let mayLoad = 1; + let mayStore = 1; +} + +class Atomic2OpsSubwordPostRA : + PseudoSE<(outs RC:$dst), (ins PtrRC:$ptr, RC:$incr, RC:$mask, RC:$mask2, + RC:$shiftamnt), []> { + let mayLoad = 1; + let mayStore = 1; +} // Atomic Compare & Swap. +// Atomic compare and swap is lowered into two stages. The first stage happens +// during ISelLowering, which produces the PostRA version of this instruction. class AtomicCmpSwap : PseudoSE<(outs DRC:$dst), (ins PtrRC:$ptr, DRC:$cmp, DRC:$swap), - [(set DRC:$dst, (Op iPTR:$ptr, DRC:$cmp, DRC:$swap))]>; + [(set DRC:$dst, (Op iPTR:$ptr, DRC:$cmp, DRC:$swap))]> { + let mayLoad = 1; + let mayStore = 1; +} + +class AtomicCmpSwapPostRA : + PseudoSE<(outs RC:$dst), (ins PtrRC:$ptr, RC:$cmp, RC:$swap), []> { + let mayLoad = 1; + let mayStore = 1; +} + +class AtomicCmpSwapSubwordPostRA : + PseudoSE<(outs RC:$dst), (ins PtrRC:$ptr, RC:$mask, RC:$ShiftCmpVal, + RC:$mask2, RC:$ShiftNewVal, RC:$ShiftAmt), []> { + let mayLoad = 1; + let mayStore = 1; +} + class LLBase : InstSE<(outs RO:$rt), (ins MO:$addr), !strconcat(opstr, "\t$rt, $addr"), @@ -1760,6 +1795,33 @@ def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap; } +def ATOMIC_LOAD_ADD_I8_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_ADD_I16_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_ADD_I32_POSTRA : Atomic2OpsPostRA; +def ATOMIC_LOAD_SUB_I8_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_SUB_I16_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_SUB_I32_POSTRA : Atomic2OpsPostRA; +def ATOMIC_LOAD_AND_I8_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_AND_I16_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_AND_I32_POSTRA : Atomic2OpsPostRA; +def ATOMIC_LOAD_OR_I8_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_OR_I16_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_OR_I32_POSTRA : Atomic2OpsPostRA; +def ATOMIC_LOAD_XOR_I8_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_XOR_I16_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_XOR_I32_POSTRA : Atomic2OpsPostRA; +def ATOMIC_LOAD_NAND_I8_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_NAND_I16_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_NAND_I32_POSTRA : Atomic2OpsPostRA; + +def ATOMIC_SWAP_I8_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_SWAP_I16_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_SWAP_I32_POSTRA : Atomic2OpsPostRA; + +def ATOMIC_CMP_SWAP_I8_POSTRA : AtomicCmpSwapSubwordPostRA; +def ATOMIC_CMP_SWAP_I16_POSTRA : AtomicCmpSwapSubwordPostRA; +def ATOMIC_CMP_SWAP_I32_POSTRA : AtomicCmpSwapPostRA; + /// Pseudo instructions for loading and storing accumulator registers. let isPseudo = 1, isCodeGenOnly = 1, hasNoSchedulingInfo = 1 in { def LOAD_ACC64 : Load<"", ACC64>; Index: lib/Target/Mips/MipsTargetMachine.cpp =================================================================== --- lib/Target/Mips/MipsTargetMachine.cpp +++ lib/Target/Mips/MipsTargetMachine.cpp @@ -222,6 +222,7 @@ bool addInstSelector() override; void addPreEmitPass() override; void addPreRegAlloc() override; + void addPreSched2() override; }; } // end anonymous namespace @@ -264,6 +265,10 @@ }); } +void MipsPassConfig::addPreSched2() { + addPass(createMipsExpandPseudoPass()); +} + // Implemented by targets that want to run passes immediately before // machine code is emitted. return true if -print-machineinstrs should // print out the code after the passes. Index: test/CodeGen/Mips/atomic.ll =================================================================== --- test/CodeGen/Mips/atomic.ll +++ test/CodeGen/Mips/atomic.ll @@ -1,22 +1,29 @@ -; RUN: llc -march=mipsel --disable-machine-licm -mcpu=mips32 -relocation-model=pic < %s | \ +; RUN: llc -march=mipsel --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -check-prefixes=ALL,MIPS32-ANY,NO-SEB-SEH,CHECK-EL,NOT-MICROMIPS ; RUN: llc -march=mipsel --disable-machine-licm -mcpu=mips32r2 -relocation-model=pic -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -check-prefixes=ALL,MIPS32-ANY,HAS-SEB-SEH,CHECK-EL,NOT-MICROMIPS ; RUN: llc -march=mipsel --disable-machine-licm -mcpu=mips32r6 -relocation-model=pic -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -check-prefixes=ALL,MIPS32-ANY,HAS-SEB-SEH,CHECK-EL,MIPSR6 -; RUN: llc -march=mips64el --disable-machine-licm -mcpu=mips4 -relocation-model=pic < %s | \ +; RUN: llc -march=mipsel --disable-machine-licm -O0 -mcpu=mips32r6 -relocation-model=pic -verify-machineinstrs < %s | \ +; RUN: FileCheck %s -check-prefixes=ALL,MIPS32-ANY,HAS-SEB-SEH,CHECK-EL,MIPSR6 +; RUN: llc -march=mips64el --disable-machine-licm -mcpu=mips4 -relocation-model=pic -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -check-prefixes=ALL,MIPS64-ANY,NO-SEB-SEH,CHECK-EL,NOT-MICROMIPS -; RUN: llc -march=mips64el --disable-machine-licm -mcpu=mips64 -relocation-model=pic < %s | \ +; RUN: llc -march=mips64el --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -check-prefixes=ALL,MIPS64-ANY,NO-SEB-SEH,CHECK-EL,NOT-MICROMIPS ; RUN: llc -march=mips64el --disable-machine-licm -mcpu=mips64r2 -relocation-model=pic -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -check-prefixes=ALL,MIPS64-ANY,HAS-SEB-SEH,CHECK-EL,NOT-MICROMIPS -; RUN: llc -march=mips64el --disable-machine-licm -mcpu=mips64r6 -relocation-model=pic < %s | \ +; RUN: llc -march=mips64el --disable-machine-licm -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -check-prefixes=ALL,MIPS64-ANY,HAS-SEB-SEH,CHECK-EL,MIPSR6 ; RUN: llc -march=mips64 -O0 -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs < %s | \ -; RUN: FileCheck %s -check-prefixes=ALL-LABEL,MIPS64-ANY,O0 -; RUN: llc -march=mipsel --disable-machine-licm -mcpu=mips32r2 -mattr=micromips -relocation-model=pic < %s | \ +; RUN: FileCheck %s -check-prefixes=ALL-LABEL,MIPS64-ANY,MIPSR6,ALL,CHECK-EB,HAS-SEB-SEH +; RUN: llc -march=mipsel --disable-machine-licm -mcpu=mips32r2 -mattr=micromips -relocation-model=pic -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -check-prefixes=ALL,MIPS32-ANY,HAS-SEB-SEH,CHECK-EL,MICROMIPS +; We want to verify the produced code is well formed all optimization levels, the rest of the tests which ensure correctness. +; RUN: llc -march=mipsel -O1 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s > /dev/null +; RUN: llc -march=mipsel -O2 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s > /dev/null +; RUN: llc -march=mipsel -O3 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s > /dev/null + ; Keep one big-endian check so that we don't reduce testing, but don't add more ; since endianness doesn't affect the body of the atomic operations. ; RUN: llc -march=mips --disable-machine-licm -mcpu=mips32 -relocation-model=pic < %s | \ @@ -34,13 +41,11 @@ ; MIPS32-ANY: lw $[[R0:[0-9]+]], %got(x) ; MIPS64-ANY: ld $[[R0:[0-9]+]], %got_disp(x)( -; O0: [[BB0:(\$|\.L)[A-Z_0-9]+]]: -; O0: ld $[[R1:[0-9]+]] -; O0-NEXT: ll $[[R2:[0-9]+]], 0($[[R1]]) - ; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]: ; ALL: ll $[[R3:[0-9]+]], 0($[[R0]]) -; ALL: addu $[[R4:[0-9]+]], $[[R3]], $4 +; NOT-MICROMIPS: addu $[[R4:[0-9]+]], $[[R3]], ${{[24]}} +; MICROMIPS: addu16 $[[R4:[0-9]+]], $[[R3]], ${{[24]}} +; MIPSR6: addu $[[R4:[0-9]+]], $[[R3]], ${{[24]}} ; ALL: sc $[[R4]], 0($[[R0]]) ; NOT-MICROMIPS: beqz $[[R4]], [[BB0]] ; MICROMIPS: beqzc $[[R4]], [[BB0]] @@ -61,7 +66,7 @@ ; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]: ; ALL: ll $[[R1:[0-9]+]], 0($[[R0]]) -; ALL: and $[[R3:[0-9]+]], $[[R1]], $4 +; ALL: and $[[R3:[0-9]+]], $[[R1]], ${{[24]}} ; ALL: nor $[[R2:[0-9]+]], $zero, $[[R3]] ; ALL: sc $[[R2]], 0($[[R0]]) ; NOT-MICROMIPS: beqz $[[R2]], [[BB0]] @@ -105,10 +110,10 @@ ; MIPS64-ANY: ld $[[R0:[0-9]+]], %got_disp(x)( ; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]: -; ALL: ll $2, 0($[[R0]]) -; NOT-MICROMIPS: bne $2, $4, [[BB1:(\$|\.L)[A-Z_0-9]+]] -; MICROMIPS: bne $2, $4, [[BB1:(\$|\.L)[A-Z_0-9]+]] -; MIPSR6: bnec $2, $4, [[BB1:(\$|\.L)[A-Z_0-9]+]] +; ALL: ll $[[R1:[0-9]+]], 0($[[R0]]) +; NOT-MICROMIPS: bne $[[R1]], ${{[0-9]+}}, [[BB1:(\$|\.L)[A-Z_0-9]+]] +; MICROMIPS: bne $[[R1]], ${{[0-9]+}}, [[BB1:(\$|\.L)[A-Z_0-9]+]] +; MIPSR6: bnec $[[R1]], ${{[0-9]+}}, [[BB1:(\$|\.L)[A-Z_0-9]+]] ; ALL: sc $[[R2:[0-9]+]], 0($[[R0]]) ; NOT-MICROMIPS: beqz $[[R2]], [[BB0]] ; MICROMIPS: beqzc $[[R2]], [[BB0]] @@ -139,15 +144,13 @@ ; ALL: ori $[[R6:[0-9]+]], $zero, 255 ; ALL: sllv $[[R7:[0-9]+]], $[[R6]], $[[R5]] ; ALL: nor $[[R8:[0-9]+]], $zero, $[[R7]] -; ALL: sllv $[[R9:[0-9]+]], $4, $[[R5]] - -; O0: [[BB0:(\$|\.L)[A-Z_0-9]+]]: -; O0: ld $[[R10:[0-9]+]] -; O0-NEXT: ll $[[R11:[0-9]+]], 0($[[R10]]) +; ALL: sllv $[[R9:[0-9]+]], ${{[24]}}, $[[R5]] ; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]: ; ALL: ll $[[R12:[0-9]+]], 0($[[R2]]) -; ALL: addu $[[R13:[0-9]+]], $[[R12]], $[[R9]] +; NOT-MICROMIPS: addu $[[R13:[0-9]+]], $[[R12]], $[[R9]] +; MICROMIPS: addu16 $[[R13:[0-9]+]], $[[R12]], $[[R9]] +; MIPSR6: addu $[[R13:[0-9]+]], $[[R12]], $[[R9]] ; ALL: and $[[R14:[0-9]+]], $[[R13]], $[[R7]] ; ALL: and $[[R15:[0-9]+]], $[[R12]], $[[R8]] ; ALL: or $[[R16:[0-9]+]], $[[R15]], $[[R14]] @@ -184,19 +187,17 @@ ; ALL: ori $[[R6:[0-9]+]], $zero, 255 ; ALL: sllv $[[R7:[0-9]+]], $[[R6]], $[[R5]] ; ALL: nor $[[R8:[0-9]+]], $zero, $[[R7]] -; ALL: sllv $[[R9:[0-9]+]], $4, $[[R5]] - -; O0: [[BB0:(\$|\.L)[A-Z_0-9]+]]: -; O0: ld $[[R10:[0-9]+]] -; O0-NEXT: ll $[[R11:[0-9]+]], 0($[[R10]]) +; ALL: sllv $[[R9:[0-9]+]], ${{[24]}}, $[[R5]] ; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]: -; ALL: ll $[[R12:[0-9]+]], 0($[[R2]]) -; ALL: subu $[[R13:[0-9]+]], $[[R12]], $[[R9]] -; ALL: and $[[R14:[0-9]+]], $[[R13]], $[[R7]] -; ALL: and $[[R15:[0-9]+]], $[[R12]], $[[R8]] -; ALL: or $[[R16:[0-9]+]], $[[R15]], $[[R14]] -; ALL: sc $[[R16]], 0($[[R2]]) +; ALL: ll $[[R12:[0-9]+]], 0($[[R2]]) +; NOT-MICROMIPS: subu $[[R13:[0-9]+]], $[[R12]], $[[R9]] +; MICROMIPS: subu16 $[[R13:[0-9]+]], $[[R12]], $[[R9]] +; MIPSR6: subu $[[R13:[0-9]+]], $[[R12]], $[[R9]] +; ALL: and $[[R14:[0-9]+]], $[[R13]], $[[R7]] +; ALL: and $[[R15:[0-9]+]], $[[R12]], $[[R8]] +; ALL: or $[[R16:[0-9]+]], $[[R15]], $[[R14]] +; ALL: sc $[[R16]], 0($[[R2]]) ; NOT-MICROMIPS: beqz $[[R16]], [[BB0]] ; MICROMIPS: beqzc $[[R16]], [[BB0]] ; MIPSR6: beqzc $[[R16]], [[BB0]] @@ -229,11 +230,7 @@ ; ALL: ori $[[R6:[0-9]+]], $zero, 255 ; ALL: sllv $[[R7:[0-9]+]], $[[R6]], $[[R5]] ; ALL: nor $[[R8:[0-9]+]], $zero, $[[R7]] -; ALL: sllv $[[R9:[0-9]+]], $4, $[[R5]] - -; O0: [[BB0:(\$|\.L)[A-Z_0-9]+]]: -; O0: ld $[[R10:[0-9]+]] -; O0-NEXT: ll $[[R11:[0-9]+]], 0($[[R10]]) +; ALL: sllv $[[R9:[0-9]+]], ${{[24]}}, $[[R5]] ; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]: ; ALL: ll $[[R12:[0-9]+]], 0($[[R2]]) @@ -275,7 +272,7 @@ ; ALL: ori $[[R6:[0-9]+]], $zero, 255 ; ALL: sllv $[[R7:[0-9]+]], $[[R6]], $[[R5]] ; ALL: nor $[[R8:[0-9]+]], $zero, $[[R7]] -; ALL: sllv $[[R9:[0-9]+]], $4, $[[R5]] +; ALL: sllv $[[R9:[0-9]+]], ${{[24]}}, $[[R5]] ; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]: ; ALL: ll $[[R10:[0-9]+]], 0($[[R2]]) @@ -317,9 +314,9 @@ ; ALL: ori $[[R6:[0-9]+]], $zero, 255 ; ALL: sllv $[[R7:[0-9]+]], $[[R6]], $[[R5]] ; ALL: nor $[[R8:[0-9]+]], $zero, $[[R7]] -; ALL: andi $[[R9:[0-9]+]], $4, 255 +; ALL: andi $[[R9:[0-9]+]], ${{[0-9]+}}, 255 ; ALL: sllv $[[R10:[0-9]+]], $[[R9]], $[[R5]] -; ALL: andi $[[R11:[0-9]+]], $5, 255 +; ALL: andi $[[R11:[0-9]+]], ${{[0-9]+}}, 255 ; ALL: sllv $[[R12:[0-9]+]], $[[R11]], $[[R5]] ; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]: @@ -361,9 +358,9 @@ ; ALL: ori $[[R6:[0-9]+]], $zero, 255 ; ALL: sllv $[[R7:[0-9]+]], $[[R6]], $[[R5]] ; ALL: nor $[[R8:[0-9]+]], $zero, $[[R7]] -; ALL: andi $[[R9:[0-9]+]], $5, 255 +; ALL: andi $[[R9:[0-9]+]], ${{[0-9]+}}, 255 ; ALL: sllv $[[R10:[0-9]+]], $[[R9]], $[[R5]] -; ALL: andi $[[R11:[0-9]+]], $6, 255 +; ALL: andi $[[R11:[0-9]+]], ${{[0-9]+}}, 255 ; ALL: sllv $[[R12:[0-9]+]], $[[R11]], $[[R5]] ; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]: @@ -387,16 +384,16 @@ ; NO-SEB-SEH: sra $[[R19:[0-9]+]], $[[R18]], 24 ; FIXME: -march=mips produces a redundant sign extension here... -; NO-SEB-SEH: sll $[[R20:[0-9]+]], $5, 24 +; NO-SEB-SEH: sll $[[R20:[0-9]+]], ${{[0-9]+}}, 24 ; NO-SEB-SEH: sra $[[R20]], $[[R20]], 24 ; HAS-SEB-SEH: seb $[[R19:[0-9]+]], $[[R17]] ; FIXME: ...Leading to this split check. ; NO-SEB-SEH: xor $[[R21:[0-9]+]], $[[R19]], $[[R20]] -; HAS-SEB-SEH: xor $[[R21:[0-9]+]], $[[R19]], $5 +; HAS-SEB-SEH: xor $[[R21:[0-9]+]], $[[R19]], ${{[0-9]+}} -; ALL: sltiu $2, $[[R21]], 1 +; ALL: sltiu ${{[0-9]+}}, $[[R21]], 1 } ; Check one i16 so that we cover the seh sign extend @@ -421,15 +418,13 @@ ; ALL: ori $[[R6:[0-9]+]], $zero, 65535 ; ALL: sllv $[[R7:[0-9]+]], $[[R6]], $[[R5]] ; ALL: nor $[[R8:[0-9]+]], $zero, $[[R7]] -; ALL: sllv $[[R9:[0-9]+]], $4, $[[R5]] - -; O0: [[BB0:(\$|\.L)[A-Z_0-9]+]]: -; O0: ld $[[R10:[0-9]+]] -; O0-NEXT: ll $[[R11:[0-9]+]], 0($[[R10]]) +; ALL: sllv $[[R9:[0-9]+]], ${{[0-9]+}}, $[[R5]] ; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]: ; ALL: ll $[[R12:[0-9]+]], 0($[[R2]]) -; ALL: addu $[[R13:[0-9]+]], $[[R12]], $[[R9]] +; NOT-MICROMIPS: addu $[[R13:[0-9]+]], $[[R12]], $[[R9]] +; MICROMIPS: addu16 $[[R13:[0-9]+]], $[[R12]], $[[R9]] +; MIPSR6: addu $[[R13:[0-9]+]], $[[R12]], $[[R9]] ; ALL: and $[[R14:[0-9]+]], $[[R13]], $[[R7]] ; ALL: and $[[R15:[0-9]+]], $[[R12]], $[[R8]] ; ALL: or $[[R16:[0-9]+]], $[[R15]], $[[R14]] @@ -477,17 +472,17 @@ ; ALL: srlv $[[R9:[0-9]+]], $[[R6]], $ -; NO-SEB-SEH: sll $[[R10:[0-9]+]], $[[R9]], 16 -; NO-SEB-SEH: sra $[[R11:[0-9]+]], $[[R10]], 16 +; NO-SEB-SEH: sll +; NO-SEB-SEH: sra -; NO-SEB-SEH: sll $[[R12:[0-9]+]], $[[R2]], 16 -; NO-SEB-SEH: sra $[[R13:[0-9]+]], $[[R12]], 16 +; NO-SEB-SEH: sll +; NO-SEB-SEH: sra -; HAS-SEB-SEH: seh $[[R11:[0-9]+]], $[[R9]] -; HAS-SEB-SEH: seh $[[R13:[0-9]+]], $[[R2]] +; HAS-SEB-SEH: seh +; HAS-SEB-SEH: seh -; ALL: xor $[[R12:[0-9]+]], $[[R11]], $[[R13]] -; ALL: sltiu $3, $[[R12]], 1 +; ALL: xor +; ALL: sltiu ; ALL: sync } @@ -540,7 +535,9 @@ ; ALL: addiu $[[PTR:[0-9]+]], $[[R0]], 1024 ; ALL: [[BB0:(\$|\.L)[A-Z_0-9]+]]: ; ALL: ll $[[R1:[0-9]+]], 0($[[PTR]]) -; ALL: addu $[[R2:[0-9]+]], $[[R1]], $4 +; NOT-MICROMIPS: addu $[[R2:[0-9]+]], $[[R1]], ${{[0-9]+}} +; MICROMIPS: addu16 $[[R2:[0-9]+]], $[[R1]], ${{[0-9]+}} +; MIPSR6: addu $[[R2:[0-9]+]], $[[R1]], ${{[0-9]+}} ; ALL: sc $[[R2]], 0($[[PTR]]) ; NOT-MICROMIPS: beqz $[[R2]], [[BB0]] ; MICROMIPS: beqzc $[[R2]], [[BB0]] Index: test/CodeGen/Mips/atomicCmpSwapPW.ll =================================================================== --- test/CodeGen/Mips/atomicCmpSwapPW.ll +++ test/CodeGen/Mips/atomicCmpSwapPW.ll @@ -10,8 +10,12 @@ ; ALL: ll ${{[0-9]+}}, 0($[[R0]]) -define {i16, i1} @foo(i16* %addr, i16 signext %r, i16 zeroext %new) { - %res = cmpxchg i16* %addr, i16 %r, i16 %new seq_cst seq_cst - ret {i16, i1} %res +@sym = external global i32 * + +define void @foo(i32 %new, i32 %old) { +entry: + %0 = load i32 *, i32 ** @sym + cmpxchg i32 * %0, i32 %new, i32 %old seq_cst seq_cst + ret void } Index: test/CodeGen/Mips/micromips-atomic.ll =================================================================== --- test/CodeGen/Mips/micromips-atomic.ll +++ test/CodeGen/Mips/micromips-atomic.ll @@ -12,7 +12,7 @@ ; CHECK: lw $[[R0:[0-9]+]], %got(x) ; CHECK: $[[BB0:[A-Z_0-9]+]]: ; CHECK: ll $[[R1:[0-9]+]], 0($[[R0]]) -; CHECK: addu $[[R2:[0-9]+]], $[[R1]], $4 +; CHECK: addu16 $[[R2:[0-9]+]], $[[R1]], $4 ; CHECK: sc $[[R2]], 0($[[R0]]) ; CHECK: beqzc $[[R2]], $[[BB0]] }