Index: lib/Target/Sparc/LeonPasses.cpp =================================================================== --- lib/Target/Sparc/LeonPasses.cpp +++ lib/Target/Sparc/LeonPasses.cpp @@ -313,11 +313,90 @@ // already have been converted to FSQRTD or FDIVD earlier in the // pipeline. if (Opcode == SP::FSQRTD || Opcode == SP::FDIVD) { - for (int InsertedCount = 0; InsertedCount < 5; InsertedCount++) + + //******************************************************************* + //optimization->begin: "Delay Slot after Unused Modified Register" + //******************************************************************* + // The FixAllFDIVSQRT pass shall be oprimized when the source register(s) of FSQRTD or FDIVD is(are) not + // destination resgisters in the previos instruction. If so, reduce the number of inserted NOPs to 2. + //******************************************************************* + bool ShallOptimize = false; + const int UNASSIGNED_INDEX = -1; + int RegSourceIndex1 = UNASSIGNED_INDEX; + int RegSourceIndex2 = UNASSIGNED_INDEX; + if (Opcode == SP::FSQRTD) + //get the source register if the instruction is "fsqrtd fregrs2, fregrd" + RegSourceIndex1 = MI.getOperand(0).getReg(); + else { + //get the source registers if the instruction is "fdivd fregrs1, fregrs2, fregrd" + RegSourceIndex1 = MI.getOperand(0).getReg(); + RegSourceIndex2 = MI.getOperand(1).getReg(); + } + + MachineBasicBlock::iterator PMBBI = std::prev(MBBI); + MachineInstr &PMI = *PMBBI; + int RegFound = 0; + // extract the registers of the prev instruction and check if they + // overlap with the source register(s) of the current instruction + for (MachineInstr::mop_iterator MOp = PMI.operands_begin(), MOE = PMI.operands_end(); MOp != MOE; ++MOp) { + if (MOp->isReg()) { + if ((RegSourceIndex1 == (int) MOp->getReg()) || (RegSourceIndex2 == (int) MOp->getReg())) + RegFound++; + } + } + if (RegFound == 0) // there is no register overlaping, then we shall optimize the pass + ShallOptimize = true; + //******************************************************************* + //optimization->end: "Delay Slot after Unused Modified Register" + //******************************************************************* + + int NumbNOPs = 5; + if (ShallOptimize) // optimization shall be applied + NumbNOPs = 2; + + // Insert 5 NOPs before FSQRTD,FDIVD. + for (int InsertedCount = 0; InsertedCount < NumbNOPs; InsertedCount++) BuildMI(MBB, MBBI, DL, TII.get(SP::NOP)); MachineBasicBlock::iterator NMBBI = std::next(MBBI); - for (int InsertedCount = 0; InsertedCount < 28; InsertedCount++) + + //******************************************************************* + //optimization: "Delay Slot after Unused Modified Register" + //******************************************************************* + // The FixAllFDIVSQRT pass shall be oprimized when the destination register of FSQRTD or FDIVD is not + // used by the next instruction. If so, reduce the number of inserted NOPs by half. + //******************************************************************* + ShallOptimize = false; + MachineInstr &NMI = *NMBBI; + int RegDesIndex = UNASSIGNED_INDEX; + if (Opcode == SP::FSQRTD) + //get dest register if the instruction is "fsqrtd fregrs2, fregrd" + RegDesIndex = MI.getOperand(1).getReg(); + else + //get dest register if the instruction is "fdivd fregrs1, fregrs2, fregrd" + RegDesIndex = MI.getOperand(2).getReg(); + + RegFound = 0; + // extract the registers of the next instruction and check if they + // overlap with the destination register of the current instruction + for (MachineInstr::mop_iterator MOp = NMI.operands_begin(), MOE = NMI.operands_end(); MOp != MOE; ++MOp) { + if (MOp->isReg()) { + if (RegDesIndex == (int) MOp->getReg()) + RegFound++; + } + } + if (RegFound == 0) // there is no register overlaping, then we shall optimize the pass + ShallOptimize = true; + //******************************************************************* + //optimization->end: "Delay Slot after Unused Modified Register" + //******************************************************************* + + NumbNOPs = 28; + if (ShallOptimize) // optimization shall be applied + NumbNOPs = NumbNOPs/2; + + // ... and inserting 28 NOPs after FSQRTD,FDIVD. + for (int InsertedCount = 0; InsertedCount < NumbNOPs; InsertedCount++) BuildMI(MBB, NMBBI, DL, TII.get(SP::NOP)); Modified = true; Index: test/CodeGen/SPARC/LeonFixAllFDIVSQRTPassUT.ll =================================================================== --- test/CodeGen/SPARC/LeonFixAllFDIVSQRTPassUT.ll +++ test/CodeGen/SPARC/LeonFixAllFDIVSQRTPassUT.ll @@ -0,0 +1,59 @@ +; RUN: llc %s -O0 -march=sparc -mcpu=leon3 -mattr=+fixallfdivsqrt -o - | FileCheck %s +; RUN: llc %s -O0 -march=sparc -mcpu=ut699 -o - | FileCheck %s + +; CHECK-LABEL: test_1 +; CHECK: nop +; CHECK: nop +; CHECK: fdivd +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +define double @test_1(double* byval %a, double* byval %b) { +entry: + %0 = load double, double* %a, align 8 + %1 = load double, double* %b, align 8 + %res = fdiv double %0, %1 + ret double %res +} + +declare double @llvm.sqrt.f64(double) nounwind readonly + +; CHECK-LABEL: test_2 +; CHECK: nop +; CHECK: nop +; CHECK: nop +; CHECK: nop +; CHECK: nop +; CHECK: fsqrtd +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +define double @test_2(double* byval %a) { +entry: + %0 = load double, double* %a, align 8 + %1 = call double @llvm.sqrt.f64(double %0) nounwind + ret double %1 +} +