Index: lib/Target/Mips/CMakeLists.txt =================================================================== --- lib/Target/Mips/CMakeLists.txt +++ lib/Target/Mips/CMakeLists.txt @@ -27,6 +27,7 @@ MipsConstantIslandPass.cpp MipsDelaySlotFiller.cpp MipsFastISel.cpp + MipsHazardSchedule.cpp MipsInstrInfo.cpp MipsISelDAGToDAG.cpp MipsISelLowering.cpp Index: lib/Target/Mips/Mips.h =================================================================== --- lib/Target/Mips/Mips.h +++ lib/Target/Mips/Mips.h @@ -29,6 +29,7 @@ FunctionPass *createMipsModuleISelDagPass(MipsTargetMachine &TM); FunctionPass *createMipsOptimizePICCallPass(MipsTargetMachine &TM); FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM); + FunctionPass *createMipsHazardSchedule(MipsTargetMachine &tm); FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM); FunctionPass *createMipsConstantIslandPass(MipsTargetMachine &tm); } // end namespace llvm; Index: lib/Target/Mips/MipsDelaySlotFiller.cpp =================================================================== --- lib/Target/Mips/MipsDelaySlotFiller.cpp +++ lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -15,6 +15,7 @@ #include "Mips.h" #include "MipsInstrInfo.h" #include "MipsTargetMachine.h" +#include "MipsSEInstrInfo.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" @@ -507,23 +508,14 @@ // Replace Branch with the compact branch instruction. Iter Filler::replaceWithCompactBranch(MachineBasicBlock &MBB, Iter Branch, DebugLoc DL) { - const MipsInstrInfo *TII = - MBB.getParent()->getSubtarget().getInstrInfo(); - - unsigned NewOpcode = - (((unsigned) Branch->getOpcode()) == Mips::BEQ) ? Mips::BEQZC_MM - : Mips::BNEZC_MM; - - const MCInstrDesc &NewDesc = TII->get(NewOpcode); - MachineInstrBuilder MIB = BuildMI(MBB, Branch, DL, NewDesc); - - MIB.addReg(Branch->getOperand(0).getReg()); - MIB.addMBB(Branch->getOperand(2).getMBB()); + const MipsSubtarget &STI = MBB.getParent()->getSubtarget(); + const MipsSEInstrInfo *TII = + static_cast(STI.getInstrInfo()); - Iter tmpIter = Branch; - Branch = std::prev(Branch); - MBB.erase(tmpIter); + unsigned NewOpcode = TII->getEquivalentCompactForm(Branch); + Branch = TII->genInstrWithNewOpc(NewOpcode, Branch); + std::next(Branch)->eraseFromParent(); return Branch; } @@ -570,7 +562,8 @@ bool Changed = false; const MipsSubtarget &STI = MBB.getParent()->getSubtarget(); bool InMicroMipsMode = STI.inMicroMipsMode(); - const MipsInstrInfo *TII = STI.getInstrInfo(); + const MipsSEInstrInfo *TII = + static_cast(STI.getInstrInfo()); for (Iter I = MBB.begin(); I != MBB.end(); ++I) { if (!hasUnoccupiedSlot(&*I)) @@ -611,27 +604,25 @@ // If instruction is BEQ or BNE with one ZERO register, then instead of // adding NOP replace this instruction with the corresponding compact // branch instruction, i.e. BEQZC or BNEZC. - unsigned Opcode = I->getOpcode(); if (InMicroMipsMode) { - switch (Opcode) { - case Mips::BEQ: - case Mips::BNE: - if (((unsigned) I->getOperand(1).getReg()) == Mips::ZERO) { - I = replaceWithCompactBranch(MBB, I, I->getDebugLoc()); - continue; - } - break; - case Mips::JR: - case Mips::PseudoReturn: - case Mips::PseudoIndirectBranch: - // For microMIPS the PseudoReturn and PseudoIndirectBranch are allways - // expanded to JR_MM, so they can be replaced with JRC16_MM. - I = replaceWithCompactJump(MBB, I, I->getDebugLoc()); - continue; - default: - break; + if (TII->getEquivalentCompactForm(I)) { + I = replaceWithCompactBranch(MBB, I, I->getDebugLoc()); + continue; } + if (I->isIndirectBranch() || I->isReturn()) + // For microMIPS the PseudoReturn and PseudoIndirectBranch are always + // expanded to JR_MM, so they can be replaced with JRC16_MM. + I = replaceWithCompactJump(MBB, I, I->getDebugLoc()); + continue; + } + + // For MIPSR6 attempt to produce the corresponding compact (no delay slot) + // form of the branch. This should save putting in a NOP. + if ((STI.hasMips32r6()) && TII->getEquivalentCompactForm(I)) { + I = replaceWithCompactBranch(MBB, I, I->getDebugLoc()); + continue; } + // Bundle the NOP to the instruction with the delay slot. BuildMI(MBB, std::next(I), I->getDebugLoc(), TII->get(Mips::NOP)); MIBundleBuilder(MBB, I, std::next(I, 2)); Index: lib/Target/Mips/MipsHazardSchedule.cpp =================================================================== --- /dev/null +++ lib/Target/Mips/MipsHazardSchedule.cpp @@ -0,0 +1,134 @@ +//===-- MipsHazardSchedule.cpp - Workaround pipeline hazards---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +// This pass is used to workaround certain pipeline hazards. For now, this covers +// compact branch hazards. In future this pass can be extended to other pipeline +// hazards, such as various MIPS1 hazards, processor errata that require +// instruction reorganization, etc. +// +// This pass has to run after the delay slot filler as that pass can introduce +// pipeline hazards, hence the existing hazard recognizer is not suitable. +// +// Hazards handled: forbidden slots for MIPSR6. +// +// A forbidden slot hazard occurs when a compact branch instruction is executed +// and the adjacent instruction in memory is a control transfer instruction such +// as a branch or jump, ERET, ERETNC, DERET, WAIT and PAUSE. +// +// For example: +// +// 0x8004 bnec a1,v0, +// 0x8008 beqc a1,a2, +// +// In such cases, the processor is required to signal a Reserved Instruction +// exception. +// +// Here, if the instruction at 0x8004 is executed, the processor will raise an +// exception as there is a control transfer instruction at 0x8008. +// +// There are two sources of forbidden slot hazards: +// +// A) A previous pass has created a compact branch directly. +// B) Transforming a delay slot branch into compact branch. This case can be +// difficult to process as lookahead for hazards is insufficent, as +// backwards delay slot fillling can also produce hazards in previously +// processed instuctions. +// + +#include "Mips.h" +#include "MipsInstrInfo.h" +#include "MipsSEInstrInfo.h" +#include "MipsTargetMachine.h" +#include "llvm/IR/Function.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "mips-hazard-schedule" + +STATISTIC(NumInsertedNops, "Number of nops inserted"); + +namespace { + +typedef MachineBasicBlock::iterator Iter; +typedef MachineBasicBlock::reverse_iterator ReverseIter; + +class MipsHazardSchedule : public MachineFunctionPass { + +public: + MipsHazardSchedule(TargetMachine &tm) : MachineFunctionPass(ID), TM(tm) {} + + const char *getPassName() const override { return "Mips Hazard Schedule"; } + + bool runOnMachineFunction(MachineFunction &F) override; + +private: + static char ID; + const TargetMachine &TM; +}; + +char MipsHazardSchedule::ID = 0; +} // end of anonymous namespace + +// createMipsMipsHazardSchedule - Returns a pass that clears pipeline hazards. +FunctionPass *llvm::createMipsHazardSchedule(MipsTargetMachine &tm) { + return new MipsHazardSchedule(tm); +} + +bool MipsHazardSchedule::runOnMachineFunction(MachineFunction &MF) { + + const MipsSubtarget *STI = + &static_cast(MF.getSubtarget()); + + // Forbidden slot hazards are only defined for MIPSR6. + if (!STI->hasMips32r6() || STI->inMicroMipsMode()) + return false; + + bool Changed = false; + const MipsInstrInfo *TII = STI->getInstrInfo(); + + for (MachineFunction::iterator FI = MF.begin(); FI != MF.end(); ++FI) { + for (Iter I = (*FI).begin(); I != (*FI).end(); ++I) { + + // Forbidden slot hazard handling. Use lookahead over state. + if (!TII->HasForbiddenSlot(&*I)) + continue; + + // Next instruction in the basic block. + if (std::next(I) != (*FI).end() && + !TII->SafeInForbiddenSlot(&*std::next(I))) { + BuildMI(*FI, std::next(I), I->getDebugLoc(), TII->get(Mips::NOP)); + Changed = true; + MIBundleBuilder(*FI, I, std::next(I, 2)); + NumInsertedNops++; + continue; + } + + // Next instruction in the physical successor basic block. + for (auto *Succ : (*FI).successors()) { + if (FI->isLayoutSuccessor(Succ) && + Succ->getFirstNonDebugInstr() != Succ->end() && + !TII->SafeInForbiddenSlot(Succ->getFirstNonDebugInstr())) { + BuildMI(&(*FI), I->getDebugLoc(), TII->get(Mips::NOP)); + Changed = true; + MIBundleBuilder(*FI, I, std::next(I, 2)); + NumInsertedNops++; + } + } + } + } + return Changed; +} Index: lib/Target/Mips/MipsISelLowering.cpp =================================================================== --- lib/Target/Mips/MipsISelLowering.cpp +++ lib/Target/Mips/MipsISelLowering.cpp @@ -1064,6 +1064,8 @@ DebugLoc DL = MI->getDebugLoc(); unsigned LL, SC, AND, NOR, ZERO, BEQ; + // FIXME: The below code should check for the ISA to emit the correct 64bit + // operations when the size is 4. if (Size == 4) { if (isMicroMips) { LL = Mips::LL_MM; Index: lib/Target/Mips/MipsInstrInfo.h =================================================================== --- lib/Target/Mips/MipsInstrInfo.h +++ lib/Target/Mips/MipsInstrInfo.h @@ -71,6 +71,11 @@ bool AllowModify, SmallVectorImpl &BranchInstrs) const; + /// Forbidden slot analysis + bool SafeInForbiddenSlot(const MachineInstr *MI) const; + + bool HasForbiddenSlot(const MachineInstr *MI) const; + /// Insert nop instruction when hazard condition is found void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override; Index: lib/Target/Mips/MipsInstrInfo.cpp =================================================================== --- lib/Target/Mips/MipsInstrInfo.cpp +++ lib/Target/Mips/MipsInstrInfo.cpp @@ -257,6 +257,49 @@ return BT_CondUncond; } +/// Predicate for distingushing between control transfer instructions and all +/// other instructions for handling forbidden slots. Consider inline assembly +/// as unsafe as well. +bool MipsInstrInfo::SafeInForbiddenSlot(const MachineInstr *MI) const { + if (MI->isCall() || MI->isBranch() || MI->isReturn() || MI->isInlineAsm()) + return false; + + switch (MI->getOpcode()) { + case Mips::ERET: + case Mips::ERETNC: + case Mips::DERET: + case Mips::PAUSE: + case Mips::WAIT: + return false; + default: + return true; + } +} + +/// Predicate for distingushing instructions that have forbidden slots. +bool MipsInstrInfo::HasForbiddenSlot(const MachineInstr *MI) const { + if (!MI->isBranch()) + return false; + + switch (MI->getOpcode()) { + case Mips::BEQC: + case Mips::BNEC: + case Mips::BLTC: + case Mips::BGEC: + case Mips::BLTUC: + case Mips::BGEUC: + case Mips::BEQZC: + case Mips::BNEZC: + case Mips::BGEZC: + case Mips::BGTZC: + case Mips::BLEZC: + case Mips::BLTZC: + return true; + default: + return false; + } +} + /// Return the number of bytes of code the specified instruction may be. unsigned MipsInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { switch (MI->getOpcode()) { @@ -278,10 +321,45 @@ MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc, MachineBasicBlock::iterator I) const { MachineInstrBuilder MIB; + bool ZeroOperandBranch = false; + + // Certain branches have two forms: e.g beq $1, $zero, dst vs beqz $1, dest + // Pick the zero form of the branch for readable assembly and for greater + // branch distance in non-microMIPS mode. + if (I->isBranch() && I->getOperand(1).isReg() && + // FIXME: Certain atomic sequences on mips64 generate 32bit references to + // Mips::ZERO, which is incorrect. This test should be updated to use + // Subtarget.getABI().GetZeroReg() when those atomic sequences and others + // are fixed. + I->getOperand(1).getReg() == Mips::ZERO) { + ZeroOperandBranch = true; + switch (NewOpc) { + case Mips::BEQC: + NewOpc = Mips::BEQZC; + break; + case Mips::BNEC: + NewOpc = Mips::BNEZC; + break; + case Mips::BGEC: + NewOpc = Mips::BGEZC; + break; + case Mips::BLTC: + NewOpc = Mips::BLTZC; + break; + case Mips::BNEZC_MM: + case Mips::BEQZC_MM: + break; + default: + ZeroOperandBranch = false; + break; + } + } + MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), get(NewOpc)); for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) - MIB.addOperand(I->getOperand(J)); + if (!(ZeroOperandBranch && (J == 1))) + MIB.addOperand(I->getOperand(J)); MIB.setMemRefs(I->memoperands_begin(), I->memoperands_end()); return MIB; Index: lib/Target/Mips/MipsSEInstrInfo.h =================================================================== --- lib/Target/Mips/MipsSEInstrInfo.h +++ lib/Target/Mips/MipsSEInstrInfo.h @@ -66,6 +66,8 @@ unsigned getOppositeBranchOpc(unsigned Opc) const override; + unsigned getEquivalentCompactForm(MachineBasicBlock::iterator I) const; + /// Adjust SP by Amount bytes. void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const override; Index: lib/Target/Mips/MipsSEInstrInfo.cpp =================================================================== --- lib/Target/Mips/MipsSEInstrInfo.cpp +++ lib/Target/Mips/MipsSEInstrInfo.cpp @@ -420,9 +420,66 @@ case Mips::BC1F: return Mips::BC1T; case Mips::BEQZC_MM: return Mips::BNEZC_MM; case Mips::BNEZC_MM: return Mips::BEQZC_MM; + case Mips::BEQZC: return Mips::BNEZC; + case Mips::BNEZC: return Mips::BEQZC; + case Mips::BEQC: return Mips::BNEC; + case Mips::BNEC: return Mips::BEQC; + case Mips::BGTZC: return Mips::BLEZC; + case Mips::BGEZC: return Mips::BLTZC; + case Mips::BLTZC: return Mips::BGEZC; + case Mips::BLEZC: return Mips::BGTZC; } } +/// getEquivalentCompactForm - Return the corresponding compact form of +/// a branch. +unsigned MipsSEInstrInfo::getEquivalentCompactForm(MachineBasicBlock::iterator I) const { + unsigned Opcode = I->getOpcode(); + bool canUseMicroMipsBranches = + Subtarget.inMicroMipsMode() && + (Opcode == Mips::BNE || Opcode == Mips::BEQ) && + I->getOperand(1).getReg() == Subtarget.getABI().GetZeroReg(); + + if (Subtarget.hasMips32r6() || canUseMicroMipsBranches) { + switch (Opcode) { + case Mips::B: + return Mips::BC; + case Mips::BAL: + return Mips::BALC; + case Mips::BEQ: + if (canUseMicroMipsBranches) + return Mips::BEQZC_MM; + else + return Mips::BEQC; + case Mips::BNE: + if (canUseMicroMipsBranches) + return Mips::BNEZC_MM; + else + return Mips::BNEC; + case Mips::BGE: + return Mips::BGEC; + case Mips::BGEU: + return Mips::BGEUC; + case Mips::BGEZ: + return Mips::BGEZC; + case Mips::BGTZ: + return Mips::BGTZC; + case Mips::BLEZ: + return Mips::BLEZC; + case Mips::BLT: + return Mips::BLTC; + case Mips::BLTU: + return Mips::BLTUC; + case Mips::BLTZ: + return Mips::BLTZC; + default: + return 0; + } + } + + return 0; +} + /// Adjust SP by Amount bytes. void MipsSEInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, @@ -493,8 +550,12 @@ Opc == Mips::BEQ64 || Opc == Mips::BNE64 || Opc == Mips::BGTZ64 || Opc == Mips::BGEZ64 || Opc == Mips::BLTZ64 || Opc == Mips::BLEZ64 || Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::B || - Opc == Mips::J || Opc == Mips::BEQZC_MM || Opc == Mips::BNEZC_MM) ? - Opc : 0; + Opc == Mips::J || Opc == Mips::BEQZC_MM || Opc == Mips::BNEZC_MM || + Opc == Mips::BEQC || Opc == Mips::BNEC || Opc == Mips::BLTC || + Opc == Mips::BGEC || Opc == Mips::BLTUC || Opc == Mips::BGEUC || + Opc == Mips::BGTZC || Opc == Mips::BLEZC || Opc == Mips::BGEZC || + Opc == Mips::BGTZC || Opc == Mips::BEQZC || Opc == Mips::BNEZC || + Opc == Mips::BC) ? Opc : 0; } void MipsSEInstrInfo::expandRetRA(MachineBasicBlock &MBB, Index: lib/Target/Mips/MipsTargetMachine.cpp =================================================================== --- lib/Target/Mips/MipsTargetMachine.cpp +++ lib/Target/Mips/MipsTargetMachine.cpp @@ -250,7 +250,13 @@ // print out the code after the passes. void MipsPassConfig::addPreEmitPass() { MipsTargetMachine &TM = getMipsTargetMachine(); + + // The delay slot filler pass can potientially create forbidden slot (FS) + // hazards for MIPSR6 which the hazard schedule pass (HSP) will fix. Any + // (new) pass that creates compact branches after the HSP must handle FS + // hazards itself or be pipelined before the HSP. addPass(createMipsDelaySlotFillerPass(TM)); + addPass(createMipsHazardSchedule(TM)); addPass(createMipsLongBranchPass(TM)); addPass(createMipsConstantIslandPass(TM)); } Index: test/CodeGen/Mips/analyzebranch.ll =================================================================== --- test/CodeGen/Mips/analyzebranch.ll +++ test/CodeGen/Mips/analyzebranch.ll @@ -19,7 +19,7 @@ ; GPR: cmp.lt.d $[[FGRCC:f[0-9]+]], $[[Z]], $f12 ; GPR: mfc1 $[[GPRCC:[0-9]+]], $[[FGRCC]] ; GPR-NOT: not $[[GPRCC]], $[[GPRCC]] -; GPR: bnez $[[GPRCC]], $BB +; GPR: bnezc $[[GPRCC]], $BB %cmp = fcmp ogt double %a, 0.000000e+00 br i1 %cmp, label %if.end6, label %if.else @@ -50,7 +50,8 @@ ; GPR: cmp.eq.s $[[FGRCC:f[0-9]+]], $f12, $[[Z]] ; GPR: mfc1 $[[GPRCC:[0-9]+]], $[[FGRCC]] ; GPR-NOT: not $[[GPRCC]], $[[GPRCC]] -; GPR: beqz $[[GPRCC]], $BB +; 64-GPR beqzc $[[GPRCC]], $BB +; 32-GPR beqz $[[GPRCC]], $BB %cmp = fcmp une float %f, 0.000000e+00 br i1 %cmp, label %if.then, label %if.end Index: test/CodeGen/Mips/atomic.ll =================================================================== --- test/CodeGen/Mips/atomic.ll +++ test/CodeGen/Mips/atomic.ll @@ -1,10 +1,10 @@ ; RUN: llc -march=mipsel --disable-machine-licm -mcpu=mips32 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS32-ANY -check-prefix=NO-SEB-SEH -check-prefix=CHECK-EL -check-prefix=NOT-MICROMIPS ; RUN: llc -march=mipsel --disable-machine-licm -mcpu=mips32r2 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS32-ANY -check-prefix=HAS-SEB-SEH -check-prefix=CHECK-EL -check-prefix=NOT-MICROMIPS -; RUN: llc -march=mipsel --disable-machine-licm -mcpu=mips32r6 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS32-ANY -check-prefix=HAS-SEB-SEH -check-prefix=CHECK-EL -check-prefix=NOT-MICROMIPS +; RUN: llc -march=mipsel --disable-machine-licm -mcpu=mips32r6 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS32-ANY -check-prefix=HAS-SEB-SEH -check-prefix=CHECK-EL -check-prefix=MIPSR6 ; RUN: llc -march=mips64el --disable-machine-licm -mcpu=mips4 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS64-ANY -check-prefix=NO-SEB-SEH -check-prefix=CHECK-EL -check-prefix=NOT-MICROMIPS ; RUN: llc -march=mips64el --disable-machine-licm -mcpu=mips64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS64-ANY -check-prefix=NO-SEB-SEH -check-prefix=CHECK-EL -check-prefix=NOT-MICROMIPS ; RUN: llc -march=mips64el --disable-machine-licm -mcpu=mips64r2 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS64-ANY -check-prefix=HAS-SEB-SEH -check-prefix=CHECK-EL -check-prefix=NOT-MICROMIPS -; RUN: llc -march=mips64el --disable-machine-licm -mcpu=mips64r6 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS64-ANY -check-prefix=HAS-SEB-SEH -check-prefix=CHECK-EL -check-prefix=NOT-MICROMIPS +; RUN: llc -march=mips64el --disable-machine-licm -mcpu=mips64r6 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS64-ANY -check-prefix=HAS-SEB-SEH -check-prefix=CHECK-EL -check-prefix=MIPSR6 ; RUN: llc -march=mipsel --disable-machine-licm -mcpu=mips32r2 -mattr=micromips < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS32-ANY -check-prefix=HAS-SEB-SEH -check-prefix=CHECK-EL -check-prefix=MICROMIPS ; Keep one big-endian check so that we don't reduce testing, but don't add more @@ -29,6 +29,7 @@ ; ALL: sc $[[R2]], 0($[[R0]]) ; NOT-MICROMIPS: beqz $[[R2]], $[[BB0]] ; MICROMIPS: beqzc $[[R2]], $[[BB0]] +; MIPSR6: beqzc $[[R2]], $[[BB0]] } define i32 @AtomicLoadNand32(i32 signext %incr) nounwind { @@ -48,6 +49,7 @@ ; ALL: sc $[[R2]], 0($[[R0]]) ; NOT-MICROMIPS: beqz $[[R2]], $[[BB0]] ; MICROMIPS: beqzc $[[R2]], $[[BB0]] +; MIPSR6: beqzc $[[R2]], $[[BB0]] } define i32 @AtomicSwap32(i32 signext %newval) nounwind { @@ -68,6 +70,7 @@ ; ALL: sc $[[R2:[0-9]+]], 0($[[R0]]) ; NOT-MICROMIPS: beqz $[[R2]], $[[BB0]] ; MICROMIPS: beqzc $[[R2]], $[[BB0]] +; MIPSR6: beqzc $[[R2]], $[[BB0]] } define i32 @AtomicCmpSwap32(i32 signext %oldval, i32 signext %newval) nounwind { @@ -86,10 +89,13 @@ ; ALL: $[[BB0:[A-Z_0-9]+]]: ; ALL: ll $2, 0($[[R0]]) -; ALL: bne $2, $4, $[[BB1:[A-Z_0-9]+]] +; NOT-MICROMIPS: bne $2, $4, $[[BB1:[A-Z_0-9]+]] +; MICROMIPS: bne $2, $4, $[[BB1:[A-Z_0-9]+]] +; MIPSR6: bnec $2, $4, $[[BB1:[A-Z_0-9]+]] ; ALL: sc $[[R2:[0-9]+]], 0($[[R0]]) ; NOT-MICROMIPS: beqz $[[R2]], $[[BB0]] ; MICROMIPS: beqzc $[[R2]], $[[BB0]] +; MIPSR6: beqzc $[[R2]], $[[BB0]] ; ALL: $[[BB1]]: } @@ -127,6 +133,7 @@ ; ALL: sc $[[R14]], 0($[[R2]]) ; NOT-MICROMIPS: beqz $[[R14]], $[[BB0]] ; MICROMIPS: beqzc $[[R14]], $[[BB0]] +; MIPSR6: beqzc $[[R14]], $[[BB0]] ; ALL: and $[[R15:[0-9]+]], $[[R10]], $[[R7]] ; ALL: srlv $[[R16:[0-9]+]], $[[R15]], $[[R5]] @@ -167,6 +174,7 @@ ; ALL: sc $[[R14]], 0($[[R2]]) ; NOT-MICROMIPS: beqz $[[R14]], $[[BB0]] ; MICROMIPS: beqzc $[[R14]], $[[BB0]] +; MIPSR6: beqzc $[[R14]], $[[BB0]] ; ALL: and $[[R15:[0-9]+]], $[[R10]], $[[R7]] ; ALL: srlv $[[R16:[0-9]+]], $[[R15]], $[[R5]] @@ -208,6 +216,7 @@ ; ALL: sc $[[R14]], 0($[[R2]]) ; NOT-MICROMIPS: beqz $[[R14]], $[[BB0]] ; MICROMIPS: beqzc $[[R14]], $[[BB0]] +; MIPSR6: beqzc $[[R14]], $[[BB0]] ; ALL: and $[[R15:[0-9]+]], $[[R10]], $[[R7]] ; ALL: srlv $[[R16:[0-9]+]], $[[R15]], $[[R5]] @@ -247,6 +256,7 @@ ; ALL: sc $[[R14]], 0($[[R2]]) ; NOT-MICROMIPS: beqz $[[R14]], $[[BB0]] ; MICROMIPS: beqzc $[[R14]], $[[BB0]] +; MIPSR6: beqzc $[[R14]], $[[BB0]] ; ALL: and $[[R15:[0-9]+]], $[[R10]], $[[R7]] ; ALL: srlv $[[R16:[0-9]+]], $[[R15]], $[[R5]] @@ -286,13 +296,16 @@ ; ALL: $[[BB0:[A-Z_0-9]+]]: ; ALL: ll $[[R13:[0-9]+]], 0($[[R2]]) ; ALL: and $[[R14:[0-9]+]], $[[R13]], $[[R7]] -; ALL: bne $[[R14]], $[[R10]], $[[BB1:[A-Z_0-9]+]] +; NOT-MICROMIPS: bne $[[R14]], $[[R10]], $[[BB1:[A-Z_0-9]+]] +; MICROMIPS: bne $[[R14]], $[[R10]], $[[BB1:[A-Z_0-9]+]] +; MIPSR6: bnec $[[R14]], $[[R10]], $[[BB1:[A-Z_0-9]+]] ; ALL: and $[[R15:[0-9]+]], $[[R13]], $[[R8]] ; ALL: or $[[R16:[0-9]+]], $[[R15]], $[[R12]] ; ALL: sc $[[R16]], 0($[[R2]]) ; NOT-MICROMIPS: beqz $[[R16]], $[[BB0]] ; MICROMIPS: beqzc $[[R16]], $[[BB0]] +; MIPSR6: beqzc $[[R16]], $[[BB0]] ; ALL: $[[BB1]]: ; ALL: srlv $[[R17:[0-9]+]], $[[R14]], $[[R5]] @@ -327,13 +340,16 @@ ; ALL: $[[BB0:[A-Z_0-9]+]]: ; ALL: ll $[[R13:[0-9]+]], 0($[[R2]]) ; ALL: and $[[R14:[0-9]+]], $[[R13]], $[[R7]] -; ALL: bne $[[R14]], $[[R10]], $[[BB1:[A-Z_0-9]+]] +; NOT-MICROMIPS: bne $[[R14]], $[[R10]], $[[BB1:[A-Z_0-9]+]] +; MICROMIPS: bne $[[R14]], $[[R10]], $[[BB1:[A-Z_0-9]+]] +; MIPSR6: bnec $[[R14]], $[[R10]], $[[BB1:[A-Z_0-9]+]] ; ALL: and $[[R15:[0-9]+]], $[[R13]], $[[R8]] ; ALL: or $[[R16:[0-9]+]], $[[R15]], $[[R12]] ; ALL: sc $[[R16]], 0($[[R2]]) ; NOT-MICROMIPS: beqz $[[R16]], $[[BB0]] ; MICROMIPS: beqzc $[[R16]], $[[BB0]] +; MIPSR6: beqzc $[[R16]], $[[BB0]] ; ALL: $[[BB1]]: ; ALL: srlv $[[R17:[0-9]+]], $[[R14]], $[[R5]] @@ -380,6 +396,7 @@ ; ALL: sc $[[R14]], 0($[[R2]]) ; NOT-MICROMIPS: beqz $[[R14]], $[[BB0]] ; MICROMIPS: beqzc $[[R14]], $[[BB0]] +; MIPSR6: beqzc $[[R14]], $[[BB0]] ; ALL: and $[[R15:[0-9]+]], $[[R10]], $[[R7]] ; ALL: srlv $[[R16:[0-9]+]], $[[R15]], $[[R5]] @@ -444,4 +461,5 @@ ; ALL: sc $[[R2]], 0($[[PTR]]) ; NOT-MICROMIPS: beqz $[[R2]], $[[BB0]] ; MICROMIPS: beqzc $[[R2]], $[[BB0]] +; MIPSR6: beqzc $[[R2]], $[[BB0]] } Index: test/CodeGen/Mips/compact-branches.ll =================================================================== --- /dev/null +++ test/CodeGen/Mips/compact-branches.ll @@ -0,0 +1,155 @@ +; RUN: llc -march=mipsel -mcpu=mips32r6 -relocation-model=static < %s | FileCheck %s + +; Function Attrs: nounwind +define void @l() { +entry: + %call = tail call i32 @k() + %call1 = tail call i32 @j() + %cmp = icmp eq i32 %call, %call1 +; CHECK: bnec + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry: +; CHECK: nop +; CHECK: jal + tail call void @f(i32 signext -2) + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +declare i32 @k() + +declare i32 @j() + +declare void @f(i32 signext) + +; Function Attrs: define void @l2() { +define void @l2() { +entry: + %call = tail call i32 @k() + %call1 = tail call i32 @i() + %cmp = icmp eq i32 %call, %call1 +; CHECK beqc + br i1 %cmp, label %if.end, label %if.then + +if.then: ; preds = %entry: +; CHECK: nop +; CHECK: jal + tail call void @f(i32 signext -1) + br label %if.end + +if.end: ; preds = %entry, %if.then + ret void +} + +declare i32 @i() + +; Function Attrs: nounwind +define void @l3() { +entry: + %call = tail call i32 @k() + %cmp = icmp slt i32 %call, 0 +; CHECK : bgez + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry: +; CHECK: nop +; CHECK: jal + tail call void @f(i32 signext 0) + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +; Function Attrs: nounwind +define void @l4() { +entry: + %call = tail call i32 @k() + %cmp = icmp slt i32 %call, 1 +; CHECK: bgtzc + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry: +; CHECK: nop +; CHECK: jal + tail call void @f(i32 signext 1) + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +; Function Attrs: nounwind +define void @l5() { +entry: + %call = tail call i32 @k() + %cmp = icmp sgt i32 %call, 0 +; CHECK: blezc + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry: +; CHECK: nop +; CHECK: jal + tail call void @f(i32 signext 2) + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +; Function Attrs: nounwind +define void @l6() { +entry: + %call = tail call i32 @k() + %cmp = icmp sgt i32 %call, -1 +; CHECK: bltzc + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry: +; CHECK: nop +; CHECK: jal + tail call void @f(i32 signext 3) + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +; Function Attrs: nounwind +define void @l7() { +entry: + %call = tail call i32 @k() + %cmp = icmp eq i32 %call, 0 +; CHECK: bnezc + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry: +; CHECK: nop +; CHECK: jal + tail call void @f(i32 signext 4) + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +; Function Attrs: nounwind +define void @l8() { +entry: + %call = tail call i32 @k() + %cmp = icmp eq i32 %call, 0 +; CHECK: beqzc + br i1 %cmp, label %if.end, label %if.then + +if.then: ; preds = %entry: +; CHECK: nop +; CHECK: jal + tail call void @f(i32 signext 5) + br label %if.end + +if.end: ; preds = %entry, %if.then + ret void +} Index: test/CodeGen/Mips/fcmp.ll =================================================================== --- test/CodeGen/Mips/fcmp.ll +++ test/CodeGen/Mips/fcmp.ll @@ -750,7 +750,7 @@ ; 32-CMP-DAG: mfc1 $[[T3:[0-9]+]], $[[T2]] ; FIXME: This instruction is redundant. ; 32-CMP-DAG: andi $[[T4:[0-9]+]], $[[T3]], 1 -; 32-CMP-DAG: bnez $[[T4]], +; 32-CMP-DAG: bnezc $[[T4]], ; 64-C-DAG: add.s $[[T0:f[0-9]+]], $f13, $f12 ; 64-C-DAG: lwc1 $[[T1:f[0-9]+]], %got_ofst($CPI32_0)( @@ -763,7 +763,7 @@ ; 64-CMP-DAG: mfc1 $[[T3:[0-9]+]], $[[T2]] ; FIXME: This instruction is redundant. ; 64-CMP-DAG: andi $[[T4:[0-9]+]], $[[T3]], 1 -; 64-CMP-DAG: bnez $[[T4]], +; 64-CMP-DAG: bnezc $[[T4]], %add = fadd fast float %at, %angle %cmp = fcmp ogt float %add, 1.000000e+00 @@ -794,7 +794,7 @@ ; 32-CMP-DAG: mfc1 $[[T3:[0-9]+]], $[[T2]] ; FIXME: This instruction is redundant. ; 32-CMP-DAG: andi $[[T4:[0-9]+]], $[[T3]], 1 -; 32-CMP-DAG: bnez $[[T4]], +; 32-CMP-DAG: bnezc $[[T4]], ; 64-C-DAG: add.d $[[T0:f[0-9]+]], $f13, $f12 ; 64-C-DAG: ldc1 $[[T1:f[0-9]+]], %got_ofst($CPI33_0)( @@ -807,7 +807,7 @@ ; 64-CMP-DAG: mfc1 $[[T3:[0-9]+]], $[[T2]] ; FIXME: This instruction is redundant. ; 64-CMP-DAG: andi $[[T4:[0-9]+]], $[[T3]], 1 -; 64-CMP-DAG: bnez $[[T4]], +; 64-CMP-DAG: bnezc $[[T4]], %add = fadd fast double %at, %angle %cmp = fcmp ogt double %add, 1.000000e+00 Index: test/CodeGen/Mips/fpbr.ll =================================================================== --- test/CodeGen/Mips/fpbr.ll +++ test/CodeGen/Mips/fpbr.ll @@ -18,7 +18,8 @@ ; GPR: mfc1 $[[GPRCC:[0-9]+]], $[[FGRCC:f[0-9]+]] ; FIXME: We ought to be able to transform not+bnez -> beqz ; GPR: not $[[GPRCC]], $[[GPRCC]] -; GPR: bnez $[[GPRCC]], $BB0_2 +; 32-GPR: bnez $[[GPRCC]], $BB0_2 +; 64-GPR: bnezc $[[GPRCC]], $BB0_2 %cmp = fcmp oeq float %f2, %f3 br i1 %cmp, label %if.then, label %if.else @@ -51,7 +52,8 @@ ; 64-GPR: cmp.ule.s $[[FGRCC:f[0-9]+]], $f13, $f12 ; GPR: mfc1 $[[GPRCC:[0-9]+]], $[[FGRCC:f[0-9]+]] ; GPR-NOT: not $[[GPRCC]], $[[GPRCC]] -; GPR: bnez $[[GPRCC]], $BB1_2 +; 32-GPR: bnez $[[GPRCC]], $BB1_2 +; 64-GPR: bnezc $[[GPRCC]], $BB1_2 %cmp = fcmp olt float %f2, %f3 br i1 %cmp, label %if.then, label %if.else @@ -80,7 +82,8 @@ ; 64-GPR: cmp.ult.s $[[FGRCC:f[0-9]+]], $f13, $f12 ; GPR: mfc1 $[[GPRCC:[0-9]+]], $[[FGRCC:f[0-9]+]] ; GPR-NOT: not $[[GPRCC]], $[[GPRCC]] -; GPR: beqz $[[GPRCC]], $BB2_2 +; 32-GPR: beqz $[[GPRCC]], $BB2_2 +; 64-GPR: beqzc $[[GPRCC]], $BB2_2 %cmp = fcmp ugt float %f2, %f3 br i1 %cmp, label %if.else, label %if.then @@ -110,7 +113,8 @@ ; GPR: mfc1 $[[GPRCC:[0-9]+]], $[[FGRCC:f[0-9]+]] ; FIXME: We ought to be able to transform not+bnez -> beqz ; GPR: not $[[GPRCC]], $[[GPRCC]] -; GPR: bnez $[[GPRCC]], $BB3_2 +; 32-GPR: bnez $[[GPRCC]], $BB3_2 +; 64-GPR: bnezc $[[GPRCC]], $BB3_2 %cmp = fcmp oeq double %f2, %f3 br i1 %cmp, label %if.then, label %if.else @@ -139,7 +143,8 @@ ; 64-GPR: cmp.ule.d $[[FGRCC:f[0-9]+]], $f13, $f12 ; GPR: mfc1 $[[GPRCC:[0-9]+]], $[[FGRCC:f[0-9]+]] ; GPR-NOT: not $[[GPRCC]], $[[GPRCC]] -; GPR: bnez $[[GPRCC]], $BB4_2 +; 32-GPR: bnez $[[GPRCC]], $BB4_2 +; 64-GPR: bnezc $[[GPRCC]], $BB4_2 %cmp = fcmp olt double %f2, %f3 br i1 %cmp, label %if.then, label %if.else @@ -168,7 +173,8 @@ ; 64-GPR: cmp.ult.d $[[FGRCC:f[0-9]+]], $f13, $f12 ; GPR: mfc1 $[[GPRCC:[0-9]+]], $[[FGRCC:f[0-9]+]] ; GPR-NOT: not $[[GPRCC]], $[[GPRCC]] -; GPR: beqz $[[GPRCC]], $BB5_2 +; 32-GPR: beqz $[[GPRCC]], $BB5_2 +; 64-GPR: beqzc $[[GPRCC]], $BB5_2 %cmp = fcmp ugt double %f2, %f3 br i1 %cmp, label %if.else, label %if.then