Index: lib/Target/Sparc/DelaySlotFiller.cpp =================================================================== --- lib/Target/Sparc/DelaySlotFiller.cpp +++ lib/Target/Sparc/DelaySlotFiller.cpp @@ -41,7 +41,9 @@ const SparcSubtarget *Subtarget; static char ID; - Filler() : MachineFunctionPass(ID) {} + Filler() : MachineFunctionPass(ID) { + initializeFillerPass(*PassRegistry::getPassRegistry()); + } StringRef getPassName() const override { return "SPARC Delay Slot Filler"; } @@ -93,6 +95,8 @@ char Filler::ID = 0; } // end of anonymous namespace +INITIALIZE_PASS(Filler, DEBUG_TYPE, "Fill delay slot for Sparc", false, false) + /// createSparcDelaySlotFillerPass - Returns a pass that fills in delay /// slots in Sparc MachineFunctions /// @@ -282,6 +286,20 @@ Opcode >= SP::FDIVD && Opcode <= SP::FSQRTD) return true; + if (Subtarget->fixTN0009() && candidate->mayStore()) + return true; + + if (Subtarget->fixTN0013()) { + switch (Opcode) { + case SP::FDIVS: + case SP::FDIVD: + case SP::FSQRTS: + case SP::FSQRTD: + return true; + default: + break; + } + } return false; } Index: lib/Target/Sparc/LeonFeatures.td =================================================================== --- lib/Target/Sparc/LeonFeatures.td +++ lib/Target/Sparc/LeonFeatures.td @@ -58,3 +58,31 @@ "true", "LEON erratum fix: Fix FDIVS/FDIVD/FSQRTS/FSQRTD instructions with NOPs and floating-point store" >; + +def FixTN0009 : SubtargetFeature< + "fix-tn0009", + "FixTN0009", + "true", + "Enable workaround for errata described in GRLIB-TN-0009" +>; + +def FixTN0011 : SubtargetFeature< + "fix-tn0011", + "FixTN0011", + "true", + "Enable workaround for errata described in GRLIB-TN-0011" +>; + +def FixTN0012 : SubtargetFeature< + "fix-tn0012", + "FixTN0012", + "true", + "Enable workaround for errata described in GRLIB-TN-0012" +>; + +def FixTN0013 : SubtargetFeature< + "fix-tn0013", + "FixTN0013", + "true", + "Enable workaround for errata described in GRLIB-TN-0013" +>; Index: lib/Target/Sparc/LeonPasses.cpp =================================================================== --- lib/Target/Sparc/LeonPasses.cpp +++ lib/Target/Sparc/LeonPasses.cpp @@ -19,8 +19,256 @@ #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/raw_ostream.h" + using namespace llvm; +struct ErrataWorkaround : public MachineFunctionPass { + const SparcSubtarget *Subtarget; + const TargetInstrInfo *TII; + static char ID; + + ErrataWorkaround() : MachineFunctionPass(ID) { + initializeErrataWorkaroundPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + bool checkSeqTN0009A(MachineBasicBlock::iterator I); + bool checkSeqTN0009B(MachineBasicBlock::iterator I); + bool checkSeqTN0012(MachineBasicBlock::iterator I); + bool checkSeqTN0013(MachineBasicBlock::iterator I); + + bool moveNext(MachineBasicBlock::iterator &I); + bool isFloat(MachineBasicBlock::iterator I); + bool isDivSqrt(MachineBasicBlock::iterator I); + void insertNop(MachineBasicBlock::iterator I); + StringRef getPassName() const override { return "Errata workaround pass"; }; +}; + +char ErrataWorkaround::ID = 0; + +bool ErrataWorkaround::moveNext(MachineBasicBlock::iterator &I) { + + MachineBasicBlock *MBB = I->getParent(); + + do { + I++; + + while (I == MBB->end()) { + if (MBB->getFallThrough() == nullptr) + return false; + MBB = MBB->getFallThrough(); + I = MBB->begin(); + } + } while (I->isMetaInstruction() || I->isInlineAsm()); + + return true; +} + +void ErrataWorkaround::insertNop(MachineBasicBlock::iterator I) { + BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(SP::NOP)); +} + +bool ErrataWorkaround::isFloat(MachineBasicBlock::iterator I) { + if (I->getNumOperands() == 0) + return false; + + if (!I->getOperand(0).isReg()) + return false; + + unsigned reg = I->getOperand(0).getReg(); + + if (!SP::FPRegsRegClass.contains(reg) && !SP::DFPRegsRegClass.contains(reg)) + return false; + + return true; +} + +bool ErrataWorkaround::isDivSqrt(MachineBasicBlock::iterator I) { + switch (I->getOpcode()) { + case SP::FDIVS: + case SP::FDIVD: + case SP::FSQRTS: + case SP::FSQRTD: + return true; + } + return false; +} + +// Prevents the following code sequence from being generated: +// (stb/sth/st/stf) -> (single non-store/load instruction) -> (any store) +// If the sequence is detected a NOP instruction is inserted after +// the first store instruction. +bool ErrataWorkaround::checkSeqTN0009A(MachineBasicBlock::iterator I) { + switch (I->getOpcode()) { + case SP::STrr: + case SP::STri: + case SP::STBrr: + case SP::STBri: + case SP::STHrr: + case SP::STHri: + case SP::STFrr: + case SP::STFri: + break; + default: + return false; + } + + MachineBasicBlock::iterator MI = I; + if (!moveNext(MI)) + return false; + + if (MI->mayStore() || MI->mayLoad()) + return false; + + MachineBasicBlock::iterator PatchHere = MI; + + if (!moveNext(MI)) + return false; + + if (!MI->mayStore()) + return false; + + insertNop(PatchHere); + return true; +} + +// Prevents the following code sequence from being generated: +// (std/stdf) -> (any store) +// If the sequence is detected a NOP instruction is inserted after +// the first store instruction. +bool ErrataWorkaround::checkSeqTN0009B(MachineBasicBlock::iterator I) { + + switch (I->getOpcode()) { + case SP::STDrr: + case SP::STDri: + case SP::STDFrr: + case SP::STDFri: + break; + default: + return false; + } + + MachineBasicBlock::iterator MI = I; + + if (!moveNext(MI)) + return false; + + if (!MI->mayStore()) + return false; + + insertNop(MI); + return true; +} + +// Inserts a NOP instruction at the target of an integer branch if the +// target is a floating-point instruction or floating-point branch. +bool ErrataWorkaround::checkSeqTN0012(MachineBasicBlock::iterator I) { + + if (I->getOpcode() != SP::BCOND) + return false; + + MachineBasicBlock *TargetMBB = I->getOperand(0).getMBB(); + MachineBasicBlock::iterator MI = TargetMBB->begin(); + + while (MI != TargetMBB->end() && MI->isMetaInstruction()) + MI++; + + if (MI == TargetMBB->end()) + return false; + + if (!isFloat(MI) && MI->getOpcode() != SP::FBCOND) + return false; + + insertNop(MI); + return true; +} + +// Prevents the following code sequence from being generated: +// (div/sqrt) -> (2 to 3 floating-point operations or loads) -> (div/sqrt) +// If the sequence is detected one or two NOP instruction are inserted after +// the first div/sqrt instruction. No NOPs are inserted if one of the floating- +// point instructions in the middle of the sequence is a (div/sqrt), or if +// they have dependency on the destination register of the first (div/sqrt). +// +// The function also prevents the following code sequence from being generated, +// (div/sqrt) -> (branch), by inserting a NOP instruction after the (div/sqrt). +bool ErrataWorkaround::checkSeqTN0013(MachineBasicBlock::iterator I) { + + if (!isDivSqrt(I)) + return false; + + unsigned dstReg = I->getOperand(0).getReg(); + + MachineBasicBlock::iterator MI = I; + if (!moveNext(MI)) + return false; + + if (MI->isBranch()) { + insertNop(MI); + return true; + } + + MachineBasicBlock::iterator PatchHere = MI; + + unsigned fpFound = 0; + for (unsigned i = 0; i < 4; i++) { + + if (!isFloat(MI)) { + if (!moveNext(MI)) + return false; + continue; + } + + if (MI->readsRegister(dstReg)) + return false; + + if (isDivSqrt(MI)) { + if (i < 2) + return false; + if (fpFound < 2) + return false; + + insertNop(PatchHere); + if (i == 2) + insertNop(PatchHere); + return true; + } + + fpFound++; + if (!moveNext(MI)) + return false; + } + + return false; +} + +bool ErrataWorkaround::runOnMachineFunction(MachineFunction &MF) { + bool Changed = false; + Subtarget = &MF.getSubtarget(); + TII = Subtarget->getInstrInfo(); + + for (auto &MBB : MF) { + for (auto &I : MBB) { + if (Subtarget->fixTN0009()) { + Changed |= checkSeqTN0009A(I); + Changed |= checkSeqTN0009B(I); + } + if (Subtarget->fixTN0012()) + Changed |= checkSeqTN0012(I); + if (Subtarget->fixTN0013()) + Changed |= checkSeqTN0013(I); + } + } + return Changed; +} + +INITIALIZE_PASS(ErrataWorkaround, "errata-workaround", "Errata workaround pass", + false, false) + +namespace llvm { +FunctionPass *createErrataWorkaroundPass() { return new ErrataWorkaround(); } +} // namespace llvm + LEONMachineFunctionPass::LEONMachineFunctionPass(char &ID) : MachineFunctionPass(ID) {} Index: lib/Target/Sparc/Sparc.h =================================================================== --- lib/Target/Sparc/Sparc.h +++ lib/Target/Sparc/Sparc.h @@ -29,6 +29,10 @@ FunctionPass *createSparcISelDag(SparcTargetMachine &TM); FunctionPass *createSparcDelaySlotFillerPass(); + FunctionPass *createErrataWorkaroundPass(); + + void initializeFillerPass(PassRegistry &); + void initializeErrataWorkaroundPass(PassRegistry &); void LowerSparcMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, Index: lib/Target/Sparc/Sparc.td =================================================================== --- lib/Target/Sparc/Sparc.td +++ lib/Target/Sparc/Sparc.td @@ -148,9 +148,8 @@ [FeatureLeon, InsertNOPLoad, FeatureNoFSMULD, FeatureNoFMULS, FixAllFDIVSQRT]>; // LEON3 FT (GR712RC). Provides features for the GR712RC processor. -// - covers all the erratum fixed for LEON3 and support for the CASA instruction. def : Processor<"gr712rc", LEON3Itineraries, - [FeatureLeon, LeonCASA]>; + [FeatureLeon, LeonCASA, FixTN0009, FixTN0011, FixTN0012, FixTN0013]>; // LEON 4 FT generic def : Processor<"leon4", LEON4Itineraries, Index: lib/Target/Sparc/SparcAsmPrinter.cpp =================================================================== --- lib/Target/Sparc/SparcAsmPrinter.cpp +++ lib/Target/Sparc/SparcAsmPrinter.cpp @@ -259,6 +259,12 @@ case TargetOpcode::DBG_VALUE: // FIXME: Debug Value. return; + case SP::CASAasi10: + case SP::SWAPrr: + case SP::SWAPri: + if (MF->getSubtarget().fixTN0011()) + OutStreamer->EmitCodeAlignment(16); + break; case SP::GETPCX: LowerGETPCXAndEmitMCInsts(MI, getSubtargetInfo()); return; Index: lib/Target/Sparc/SparcSubtarget.h =================================================================== --- lib/Target/Sparc/SparcSubtarget.h +++ lib/Target/Sparc/SparcSubtarget.h @@ -50,6 +50,10 @@ bool InsertNOPLoad; bool FixAllFDIVSQRT; bool DetectRoundChange; + bool FixTN0009; + bool FixTN0011; + bool FixTN0012; + bool FixTN0013; SparcInstrInfo InstrInfo; SparcTargetLowering TLInfo; @@ -95,6 +99,10 @@ bool insertNOPLoad() const { return InsertNOPLoad; } bool fixAllFDIVSQRT() const { return FixAllFDIVSQRT; } bool detectRoundChange() const { return DetectRoundChange; } + bool fixTN0009() const { return FixTN0009; } + bool fixTN0011() const { return FixTN0011; } + bool fixTN0012() const { return FixTN0012; } + bool fixTN0013() const { return FixTN0013; } /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. Index: lib/Target/Sparc/SparcSubtarget.cpp =================================================================== --- lib/Target/Sparc/SparcSubtarget.cpp +++ lib/Target/Sparc/SparcSubtarget.cpp @@ -47,6 +47,10 @@ InsertNOPLoad = false; FixAllFDIVSQRT = false; DetectRoundChange = false; + FixTN0009 = false; + FixTN0011 = false; + FixTN0012 = false; + FixTN0013 = false; // Determine default and user specified characteristics std::string CPUName = CPU; Index: lib/Target/Sparc/SparcTargetMachine.cpp =================================================================== --- lib/Target/Sparc/SparcTargetMachine.cpp +++ lib/Target/Sparc/SparcTargetMachine.cpp @@ -25,6 +25,10 @@ RegisterTargetMachine X(getTheSparcTarget()); RegisterTargetMachine Y(getTheSparcV9Target()); RegisterTargetMachine Z(getTheSparcelTarget()); + + auto &PR = *PassRegistry::getPassRegistry(); + initializeFillerPass(PR); + initializeErrataWorkaroundPass(PR); } static std::string computeDataLayout(const Triple &T, bool is64Bit) { @@ -166,7 +170,9 @@ return false; } -void SparcPassConfig::addPreEmitPass(){ +void SparcPassConfig::addPreEmitPass() { + const SparcSubtarget *ST = getSparcTargetMachine().getSubtargetImpl(); + addPass(createSparcDelaySlotFillerPass()); if (this->getSparcTargetMachine().getSubtargetImpl()->insertNOPLoad()) @@ -180,6 +186,9 @@ { addPass(new FixAllFDIVSQRT()); } + + if (ST->fixTN0009() || ST->fixTN0011() || ST->fixTN0012() || ST->fixTN0013()) + addPass(createErrataWorkaroundPass()); } void SparcV8TargetMachine::anchor() { } Index: test/CodeGen/SPARC/tn0009.mir =================================================================== --- /dev/null +++ test/CodeGen/SPARC/tn0009.mir @@ -0,0 +1,26 @@ +# RUN: llc %s -mattr=+fix-tn0009 -march=sparc -run-pass=errata-workaround -o - \ +# RUN: | FileCheck %s + +--- +# CHECK: STrr $o1, $g0, $o0 +# CHECK-NEXT: NOP +# CHECK-NEXT: NOP +# CHECK-NEXT: STrr $o1, $g0, $o0 +name: seqa +body: | + bb.0.entry: + STrr $o1, $g0, $o0 + NOP + STrr $o1, $g0, $o0 + +--- +# CHECK: STDrr $i0, $g0, $i0_i1 +# CHECK-NEXT: NOP +# CHECK-NEXT: STDrr $i0, $g0, $i0_i1 +name: seqb +body: | + bb.0.entry: + STDrr $i0, $g0, $i0_i1 + STDrr $i0, $g0, $i0_i1 + +... Index: test/CodeGen/SPARC/tn0011.ll =================================================================== --- /dev/null +++ test/CodeGen/SPARC/tn0011.ll @@ -0,0 +1,9 @@ +; RUN: llc < %s -mcpu=gr712rc -mtriple=sparc -mattr=fix-tn0011 | FileCheck %s + +; CHECK: .p2align 4 +; CHECK-NEXT: casa +define i32 @test_atomic(i32* %p, i32 %v) { +entry: + %0 = atomicrmw nand i32* %p, i32 %v seq_cst + ret i32 %0 +} Index: test/CodeGen/SPARC/tn0012.mir =================================================================== --- /dev/null +++ test/CodeGen/SPARC/tn0012.mir @@ -0,0 +1,65 @@ +# RUN: llc %s -mattr=+fix-tn0012 -march=sparc -run-pass=errata-workaround -o - \ +# RUN: | FileCheck %s + +--- +# CHECK: bb.2: +# CHECK: NOP +# CHECK-NEXT: $f0 = FADDS $f0, $f0 +name: er-5-1 +body: | + bb.0.entry: + successors: %bb.1, %bb.2 + BCOND %bb.2, 10, implicit $icc { + NOP + } + + bb.1.entry: + successors: %bb.2 + NOP + + bb.2: + $f0 = FADDS $f0, $f0 + +--- +# CHECK: bb.2: +# CHECK: NOP +# CHECK-NEXT: FBCOND %bb.3, 22, implicit $fcc0 { +name: er-5-2 +body: | + bb.0.entry: + successors: %bb.2, %bb.1 + BCOND %bb.1, 10, implicit $icc { + NOP + } + + bb.2.entry: + successors: %bb.1 + NOP + + bb.1: + successors: %bb.3 + FBCOND %bb.3, 22, implicit $fcc0 { + NOP + } + bb.3: + NOP + +--- +# CHECK: bb.2: +# CHECK: NOP +# CHECK-NEXT: FCMPS $f0, $f0, implicit-def $fcc0 +name: er-5-3 +body: | + bb.0.entry: + successors: %bb.1, %bb.2 + BCOND %bb.2, 10, implicit $icc { + NOP + } + + bb.1.entry: + successors: %bb.2 + NOP + + bb.2: + FCMPS $f0, $f0, implicit-def $fcc0 +... Index: test/CodeGen/SPARC/tn0013.mir =================================================================== --- /dev/null +++ test/CodeGen/SPARC/tn0013.mir @@ -0,0 +1,93 @@ +# RUN: llc %s -mattr=+fix-tn0013 -march=sparc -run-pass=errata-workaround -o - \ +# RUN: | FileCheck %s + +--- +# CHECK: $f0 = FSQRTS $f0 +# CHECK-NEXT: NOP +# CHECK-NEXT: NOP +# CHECK-NEXT: $f3 = FADDS $f1, $f2 +# CHECK-NEXT: $f3 = FADDS $f1, $f2 +# CHECK-NEXT: $f0 = FDIVS $f4, $f5 +name: er-8-1 +body: | + bb.0.entry: + $f0 = FSQRTS $f0 + $f3 = FADDS $f1, $f2 + $f3 = FADDS $f1, $f2 + $f0 = FDIVS $f4, $f5 + +--- +# CHECK: $f0 = FDIVS $f0, $f0 +# CHECK-NEXT: NOP +# CHECK-NEXT: $f3 = FADDS $f1, $f2 +# CHECK-NEXT: $f3 = FADDS $f1, $f2 +# CHECK-NEXT: $f3 = FADDS $f1, $f2 +# CHECK-NEXT: $f0 = FSQRTS $f4 +name: er-8-2 +body: | + bb.0.entry: + $f0 = FDIVS $f0, $f0 + $f3 = FADDS $f1, $f2 + $f3 = FADDS $f1, $f2 + $f3 = FADDS $f1, $f2 + $f0 = FSQRTS $f4 + +--- +# CHECK-NOT: NOP +name: er-9 +body: | + bb.0.entry: + $f0 = FSQRTS $f0 + $f3 = FADDS $f1, $f2 + $f3 = FADDS $f0, $f2 + $f3 = FADDS $f1, $f2 + $f0 = FSQRTS $f0 + +--- +# CHECK-NOT: NOP +name: er-10 +body: | + bb.0.entry: + $f0 = FSQRTS $f0 + $f4 = FSQRTS $f4 + $f3 = FADDS $f1, $f2 + $f0 = FSQRTS $f0 + +--- +# CHECK: er-11 +# CHECK: $f0 = FSQRTS $f0 +# CHECK-NEXT: NOP +# CHECK: $f0 = FDIVS $f0, $f0 +# CHECK-NEXT: NOP +name: er-11 +body: | + bb.0.entry: + successors: %bb.3, %bb.1 + $f0 = FSQRTS $f0 + FBCOND %bb.3, 22, implicit $fcc0 { + NOP + } + bb.1: + successors: %bb.3, %bb.4 + $f0 = FDIVS $f0, $f0 + BCOND %bb.4, 10, implicit $icc { + NOP + } + bb.3: + NOP + bb.4: + NOP + +--- +# CHECK: $f1 = FDIVS $f0, $f1 +# CHECK-NEXT: NOP +# CHECK-NEXT: STri $i6, -84, $i2 +name: er-8-3 +body: | + bb.0.entry: + $f1 = FDIVS $f0, $f1 + STri $i6, -84, $i2 + $f0 = LDFri $i6, -84 + $f0 = FITOS $f0 + $f5 = FDIVS $f4, $f0 +...