Index: lib/Target/Sparc/LeonFloatOptimisations.cpp =================================================================== --- lib/Target/Sparc/LeonFloatOptimisations.cpp +++ lib/Target/Sparc/LeonFloatOptimisations.cpp @@ -0,0 +1,178 @@ +//===------ LeonFloatOptimisations.cpp - Optimisations specific to LEON ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#include "LeonPasses.h" +#include "Sparc.h" +#include "SparcSubtarget.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "leon-float-optimizer" + +STATISTIC(OptimizedLeonFloatInstructions, + "Number of LEON float instructions optimized"); + +static cl::opt + EnableLeonFloatOptimizer("enable-leon-float-optimizer", cl::init(false), + cl::desc("Enable the LEON float optimizer."), + cl::Hidden); + +namespace { +struct ConvertDoubleFPToSingleFPInstr : public LEONMachineFunctionPass { + const SparcSubtarget *Subtarget; + + static char ID; + ConvertDoubleFPToSingleFPInstr() : LEONMachineFunctionPass(ID) {} + + const char *getPassName() const override { + return "LEON Float Instruction Optimizer"; + } + + bool runOnMachineFunction(MachineFunction &F) override; +}; +char ConvertDoubleFPToSingleFPInstr::ID = 0; +} // end of anonymous namespace + +FunctionPass *llvm::createLeonFloatOptimizerPass(TargetMachine &tm) { + return new ConvertDoubleFPToSingleFPInstr; +} + +//**************************************************************************************************************** +//**** ConvertDoubleFPToSingleFPInstr optimization pass +//**************************************************************************************************************** +// This pass converts FDIVD to FDIVS and FSQRTD to FSQRTS +// + +bool ConvertDoubleFPToSingleFPInstr::runOnMachineFunction(MachineFunction &MF) { + if (!EnableLeonFloatOptimizer) { + return false; + } + + Subtarget = &MF.getSubtarget(); + const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); + DebugLoc DL = DebugLoc(); + + bool Modified = false; + for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) { + MachineBasicBlock &MBB = *MFI; + for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) { + MachineInstr &MI = *MBBI; + unsigned Opcode = MI.getOpcode(); + + const int UNASSIGNED_INDEX = -1; + + if (Opcode == SP::FDIVD && MI.getNumOperands() == 3) { + // take the registers from fdivd %f20,%f21,%f8 + int Reg1Index = MI.getOperand(0).getReg(); + int Reg2Index = MI.getOperand(1).getReg(); + int Reg3Index = MI.getOperand(2).getReg(); + + clearUsedRegisterList(); + MachineBasicBlock::iterator NMBBI = std::next(MBBI); + + // Reg3Index hasn't been used yet, so we need to reserve it. + markRegisterUsed(Reg3Index); + const int ScratchReg1Index = getUnusedFPRegister(MF.getRegInfo()); + markRegisterUsed(ScratchReg1Index); + const int ScratchReg2Index = getUnusedFPRegister(MF.getRegInfo()); + markRegisterUsed(ScratchReg2Index); + + if (ScratchReg1Index == UNASSIGNED_INDEX || + ScratchReg2Index == UNASSIGNED_INDEX) { + errs() << "Cannot allocate free scratch registers for the " + "ConvertDoubleFPToSingleFPInstr pass.\n"; + } else { + // create fdtos %f20,%f0 + BuildMI(MBB, MBBI, DL, TII.get(SP::FDTOS)) + .addReg(ScratchReg1Index) + .addReg(Reg1Index); + + // create fdtos %f21,%f2 + BuildMI(MBB, MBBI, DL, TII.get(SP::FDTOS)) + .addReg(ScratchReg2Index) + .addReg(Reg2Index); + + // create fdivs %f0,%f2,%f8 + BuildMI(MBB, MBBI, DL, TII.get(SP::FDIVS)) + .addReg(Reg3Index) + .addReg(ScratchReg1Index) + .addReg(ScratchReg2Index); + + MI.eraseFromParent(); + MBBI = NMBBI; + + ++OptimizedLeonFloatInstructions; + emitOptimizationRemark( + MF.getFunction()->getContext(), getPassName(), *MF.getFunction(), + MI.getDebugLoc(), "Optimized slow LEON double precision floating " + "point instruction to single precision " + "floating point instruction. Be aware that " + "this trades off accuracy against speed."); + Modified = true; + } + } + + if (Opcode == SP::FSQRTD && MI.getNumOperands() == 2) { + // take the registers from fsqrtd %f20,%f21 + int Reg1Index = MI.getOperand(0).getReg(); + int Reg2Index = MI.getOperand(1).getReg(); + + clearUsedRegisterList(); + MachineBasicBlock::iterator NMBBI = std::next(MBBI); + + // Reg2Index hasn't been used yet, so we need to reserve it. + markRegisterUsed(Reg2Index); + const int ScratchReg1Index = getUnusedFPRegister(MF.getRegInfo()); + markRegisterUsed(ScratchReg1Index); + + if (ScratchReg1Index == UNASSIGNED_INDEX) { + errs() << "Cannot allocate free scratch registers for the " + "ConvertDoubleFPToSingleFPInstr pass.\n"; + } else { + // create fdtos %f20,%f0 + BuildMI(MBB, MBBI, DL, TII.get(SP::FDTOS)) + .addReg(ScratchReg1Index) + .addReg(Reg1Index); + + // create fsqrts %f0,%f2 + BuildMI(MBB, MBBI, DL, TII.get(SP::FSQRTS)) + .addReg(Reg2Index) + .addReg(ScratchReg1Index); + + MI.eraseFromParent(); + MBBI = NMBBI; + + ++OptimizedLeonFloatInstructions; + emitOptimizationRemark( + MF.getFunction()->getContext(), getPassName(), *MF.getFunction(), + MI.getDebugLoc(), "Optimized slow LEON double precision floating " + "point instruction to single precision " + "floating point instruction. Be aware that " + "this trades off accuracy against speed."); + Modified = true; + } + } + } + } + + return Modified; +} Index: lib/Target/Sparc/Sparc.h =================================================================== --- lib/Target/Sparc/Sparc.h +++ lib/Target/Sparc/Sparc.h @@ -29,6 +29,7 @@ FunctionPass *createSparcISelDag(SparcTargetMachine &TM); FunctionPass *createSparcDelaySlotFillerPass(TargetMachine &TM); + FunctionPass *createLeonFloatOptimizerPass(TargetMachine &TM); void LowerSparcMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, Index: lib/Target/Sparc/SparcTargetMachine.cpp =================================================================== --- lib/Target/Sparc/SparcTargetMachine.cpp +++ lib/Target/Sparc/SparcTargetMachine.cpp @@ -144,6 +144,7 @@ void SparcPassConfig::addPreEmitPass() { addPass(createSparcDelaySlotFillerPass(getSparcTargetMachine())); + addPass(createLeonFloatOptimizerPass(getSparcTargetMachine())); if (this->getSparcTargetMachine().getSubtargetImpl()->ignoreZeroFlag()) { addPass(new IgnoreZeroFlag(getSparcTargetMachine())); } Index: test/CodeGen/SPARC/LeonConvertDoubleFPToSingleFPInstrUT.ll =================================================================== --- test/CodeGen/SPARC/LeonConvertDoubleFPToSingleFPInstrUT.ll +++ test/CodeGen/SPARC/LeonConvertDoubleFPToSingleFPInstrUT.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s -O0 -march=sparc -mcpu=leon2 -enable-leon-float-optimizer | FileCheck %s +; RUN: llc < %s -O0 -march=sparc -mcpu=leon3 -enable-leon-float-optimizer | FileCheck %s +; RUN: llc < %s -O0 -march=sparc -mcpu=leon4 -enable-leon-float-optimizer | FileCheck %s +; RUN: llc < %s -O0 -march=sparc -mcpu=at697e -enable-leon-float-optimizer | FileCheck %s +; RUN: llc < %s -O0 -march=sparc -mcpu=at697f -enable-leon-float-optimizer | FileCheck %s +; RUN: llc < %s -O0 -march=sparc -mcpu=ut699 -enable-leon-float-optimizer | FileCheck %s +; RUN: llc < %s -O0 -march=sparc -mcpu=gr712rc -enable-leon-float-optimizer | FileCheck %s +; RUN: llc < %s -O0 -march=sparc -mcpu=gr740 -enable-leon-float-optimizer | FileCheck %s + +; testing conversion FDIVs to FDIVs +; CHECK-LABEL: test_fdivd_conversion +; CHECK: fdivs +define double @test_fdivd_conversion(double* byval %a, double* byval %b) { +entry: + %0 = load double, double* %a, align 8 + %1 = load double, double* %b, align 8 + %res = fdiv double %0, %1 + ret double %res +} + +declare double @llvm.sqrt.f64(double %f) + +; testing conversion FSQRTd to FSQRTs +; CHECK-LABEL: test_fsqrtd_conversion +; CHECK: fsqrts +define void @test_fsqrtd_conversion(double *%ptr) { + %orig = load double , double *%ptr + %sqrt = call double @llvm.sqrt.f64(double %orig) + store double %sqrt, double *%ptr + ret void +} \ No newline at end of file