Index: llvm/trunk/lib/Target/Sparc/LeonFeatures.td =================================================================== --- llvm/trunk/lib/Target/Sparc/LeonFeatures.td +++ llvm/trunk/lib/Target/Sparc/LeonFeatures.td @@ -10,57 +10,82 @@ // //===----------------------------------------------------------------------===// - //===----------------------------------------------------------------------===// -// UMAC and SMAC support for LEON3 and LEON4 processors. +// CASA Support differs between LEON3-FT GR712RC and LEON3-FT UT699 +// We need to have the option to switch this on and off. //===----------------------------------------------------------------------===// -//support to casa instruction; for leon3 subtarget only -def UMACSMACSupport : SubtargetFeature< - "hasumacsmac", - "HasUmacSmac", - "true", - "Enable UMAC and SMAC for LEON3 and LEON4 processors" ->; - +// support to casa instruction; for leon3 subtarget only +def LeonCASA : SubtargetFeature< + "hasleoncasa", "HasLeonCasa", "true", + "Enable CASA instruction for LEON3 and LEON4 processors">; //===----------------------------------------------------------------------===// -// CASA Support differs between LEON3-FT GR712RC and LEON3-FT UT699 -// We need to have the option to switch this on and off. +// UMAC and SMAC support for LEON3 and LEON4 processors. //===----------------------------------------------------------------------===// -//support to casa instruction; for leon3 subtarget only -def LeonCASA : SubtargetFeature< - "hasleoncasa", - "HasLeonCasa", - "true", - "Enable CASA instruction for LEON3 and LEON4 processors" ->; - -def InsertNOPLoad: SubtargetFeature< - "insertnopload", - "InsertNOPLoad", - "true", - "LEON3 erratum fix: Insert a NOP instruction after every single-cycle load instruction when the next instruction is another load/store instruction" ->; - -def FixFSMULD : SubtargetFeature< - "fixfsmuld", - "FixFSMULD", - "true", - "LEON erratum fix: Do not use FSMULD" ->; - -def ReplaceFMULS : SubtargetFeature< - "replacefmuls", - "ReplaceFMULS", - "true", - "LEON erratum fix: Replace FMULS instruction with FMULD and relevant conversion instructions" ->; - -def FixAllFDIVSQRT : SubtargetFeature< - "fixallfdivsqrt", - "FixAllFDIVSQRT", - "true", - "LEON erratum fix: Fix FDIVS/FDIVD/FSQRTS/FSQRTD instructions with NOPs and floating-point store" ->; +// support to casa instruction; for leon3 subtarget only +def UMACSMACSupport + : SubtargetFeature<"hasumacsmac", "HasUmacSmac", "true", + "Enable UMAC and SMAC for LEON3 and LEON4 processors">; + +//===----------------------------------------------------------------------===// +// LEON Erratum fixes +//===----------------------------------------------------------------------===// + +def ReplaceSDIV + : SubtargetFeature< + "replacesdiv", "PerformSDIVReplace", "true", + "AT697E erratum fix: Do not emit SDIV, emit SDIVCC instead">; + +def FixCALL + : SubtargetFeature<"fixcall", "FixCallImmediates", "true", + "AT697E erratum fix: Restrict the size of the immediate " + "operand of the CALL instruction to 20 bits">; + +def IgnoreZeroFlag + : SubtargetFeature<"ignrzeroflag", "IgnoreZeroFlag", "true", + "AT697E erratum fix: Do not rely on the zero bit flag " + "on a divide overflow for SDIVCC and UDIVCC">; + +def InsertNOPDoublePrecision + : SubtargetFeature<"insrtnopdblprcsn", "InsertNOPDoublePrecision", "true", + "LEON2 erratum fix: Insert a NOP before the double " + "precision floating point instruction">; + +def FixFSMULD : SubtargetFeature<"fixfsmuld", "FixFSMULD", "true", + "LEON3 erratum fix: Do not select FSMULD">; + +def ReplaceFMULS + : SubtargetFeature<"replacefmuls", "ReplaceFMULS", "true", + "LEON3 erratum fix: Replace FMULS instruction with a " + "routine using conversions/double precision operations " + "to replace FMULS">; + +def PreventRoundChange + : SubtargetFeature<"prvntroundchange", "PreventRoundChange", "true", + "LEON3 erratum fix: Prevent any rounding mode change " + "request: use only the round-to-nearest rounding mode">; + +def FixAllFDIVSQRT + : SubtargetFeature<"fixallfdivsqrt", "FixAllFDIVSQRT", "true", + "LEON3 erratum fix: Fix FDIVS/FDIVD/FSQRTS/FSQRTD " + "instructions with NOPs and floating-point store">; + +def InsertNOPLoad + : SubtargetFeature<"insertnopload", "InsertNOPLoad", "true", + "LEON3 erratum fix: Insert a NOP instruction after " + "every single-cycle load instruction when the next " + "instruction is another load/store instruction">; + +def FlushCacheLineSWAP + : SubtargetFeature<"flshcachelineswap", "FlushCacheLineSWAP", "true", + "LEON3 erratum fix: Flush cache line containing the " + "lock before performing any of the atomic instructions " + "SWAP and LDSTUB">; + +def InsertNOPsLoadStore + : SubtargetFeature<"insertnopsloadstore", "InsertNOPsLoadStore", "true", + "LEON3 erratum fix: Insert NOPs between " + "single-precision loads and the store, so the number of " + "instructions between is 4">; Index: llvm/trunk/lib/Target/Sparc/LeonPasses.h =================================================================== --- llvm/trunk/lib/Target/Sparc/LeonPasses.h +++ llvm/trunk/lib/Target/Sparc/LeonPasses.h @@ -44,17 +44,57 @@ int getUnusedFPRegister(MachineRegisterInfo &MRI); }; -class LLVM_LIBRARY_VISIBILITY InsertNOPLoad : public LEONMachineFunctionPass { +class LLVM_LIBRARY_VISIBILITY ReplaceSDIV : public LEONMachineFunctionPass { public: static char ID; - InsertNOPLoad(TargetMachine &tm); + ReplaceSDIV(); + ReplaceSDIV(TargetMachine &tm); bool runOnMachineFunction(MachineFunction &MF) override; const char *getPassName() const override { - return "InsertNOPLoad: Erratum Fix LBR35: insert a NOP instruction after " - "every single-cycle load instruction when the next instruction is " - "another load/store instruction"; + return "ReplaceSDIV: Erratum Fix LBR25: do not emit SDIV, but emit SDIVCC " + "instead"; + } +}; + +class LLVM_LIBRARY_VISIBILITY FixCALL : public LEONMachineFunctionPass { +public: + static char ID; + + FixCALL(TargetMachine &tm); + bool runOnMachineFunction(MachineFunction &MF) override; + + const char *getPassName() const override { + return "FixCALL: Erratum Fix LBR26: restrict the size of the immediate " + "operand of the CALL instruction to 20 bits"; + } +}; + +class LLVM_LIBRARY_VISIBILITY IgnoreZeroFlag : public LEONMachineFunctionPass { +public: + static char ID; + + IgnoreZeroFlag(TargetMachine &tm); + bool runOnMachineFunction(MachineFunction &MF) override; + + const char *getPassName() const override { + return "IgnoreZeroFlag: Erratum Fix LBR28: do not rely on the zero bit " + "flag on a divide overflow for SDIVCC and UDIVCC"; + } +}; + +class LLVM_LIBRARY_VISIBILITY InsertNOPDoublePrecision + : public LEONMachineFunctionPass { +public: + static char ID; + + InsertNOPDoublePrecision(TargetMachine &tm); + bool runOnMachineFunction(MachineFunction &MF) override; + + const char *getPassName() const override { + return "InsertNOPDoublePrecision: Erratum Fix LBR30: insert a NOP before " + "the double precision floating point instruction"; } }; @@ -84,6 +124,20 @@ } }; +class LLVM_LIBRARY_VISIBILITY PreventRoundChange + : public LEONMachineFunctionPass { +public: + static char ID; + + PreventRoundChange(TargetMachine &tm); + bool runOnMachineFunction(MachineFunction &MF) override; + + const char *getPassName() const override { + return "PreventRoundChange: Erratum Fix LBR33: prevent any rounding mode " + "change request: use only the round-to-nearest rounding mode"; + } +}; + class LLVM_LIBRARY_VISIBILITY FixAllFDIVSQRT : public LEONMachineFunctionPass { public: static char ID; @@ -96,6 +150,50 @@ "instructions with NOPs and floating-point store"; } }; -} // namespace llvm + +class LLVM_LIBRARY_VISIBILITY InsertNOPLoad : public LEONMachineFunctionPass { +public: + static char ID; + + InsertNOPLoad(TargetMachine &tm); + bool runOnMachineFunction(MachineFunction &MF) override; + + const char *getPassName() const override { + return "InsertNOPLoad: insert a NOP instruction after " + "every single-cycle load instruction when the next instruction is " + "another load/store instruction"; + } +}; + +class LLVM_LIBRARY_VISIBILITY FlushCacheLineSWAP + : public LEONMachineFunctionPass { +public: + static char ID; + + FlushCacheLineSWAP(TargetMachine &tm); + bool runOnMachineFunction(MachineFunction &MF) override; + + const char *getPassName() const override { + return "FlushCacheLineSWAP: Erratum Fix LBR36: flush cache line containing " + "the lock before performing any of the atomic instructions SWAP and " + "LDSTUB"; + } +}; + +class LLVM_LIBRARY_VISIBILITY InsertNOPsLoadStore + : public LEONMachineFunctionPass { +public: + static char ID; + + InsertNOPsLoadStore(TargetMachine &tm); + bool runOnMachineFunction(MachineFunction &MF) override; + + const char *getPassName() const override { + return "InsertNOPsLoadStore: Erratum Fix LBR37: insert NOPs between " + "single-precision loads and the store, so the number of " + "instructions between is 4"; + } +}; +} // namespace lllvm #endif // LLVM_LIB_TARGET_SPARC_LEON_PASSES_H Index: llvm/trunk/lib/Target/Sparc/LeonPasses.cpp =================================================================== --- llvm/trunk/lib/Target/Sparc/LeonPasses.cpp +++ llvm/trunk/lib/Target/Sparc/LeonPasses.cpp @@ -149,13 +149,10 @@ Reg2Index = MI.getOperand(1).getReg(); Reg3Index = MI.getOperand(2).getReg(); } else if (MI.isInlineAsm()) { - std::string AsmString( - MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName()); - std::string FMULSOpCoode("fsmuld"); - std::transform(AsmString.begin(), AsmString.end(), AsmString.begin(), - ::tolower); - if (AsmString.find(FMULSOpCoode) == - 0) { // this is an inline FSMULD instruction + StringRef AsmString = + MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName(); + if (AsmString.startswith_lower("fsmuld")) { + // this is an inline FSMULD instruction unsigned StartOp = InlineAsm::MIOp_FirstOperand; @@ -264,13 +261,10 @@ Reg2Index = MI.getOperand(1).getReg(); Reg3Index = MI.getOperand(2).getReg(); } else if (MI.isInlineAsm()) { - std::string AsmString( - MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName()); - std::string FMULSOpCoode("fmuls"); - std::transform(AsmString.begin(), AsmString.end(), AsmString.begin(), - ::tolower); - if (AsmString.find(FMULSOpCoode) == - 0) { // this is an inline FMULS instruction + StringRef AsmString = + MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName(); + if (AsmString.startswith_lower("fmuls")) { + // this is an inline FMULS instruction unsigned StartOp = InlineAsm::MIOp_FirstOperand; // extracts the registers from the inline assembly instruction @@ -370,17 +364,13 @@ unsigned Opcode = MI.getOpcode(); if (MI.isInlineAsm()) { - std::string AsmString( - MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName()); - std::string FSQRTDOpCode("fsqrtd"); - std::string FDIVDOpCode("fdivd"); - std::transform(AsmString.begin(), AsmString.end(), AsmString.begin(), - ::tolower); - if (AsmString.find(FSQRTDOpCode) == - 0) { // this is an inline fsqrts instruction + StringRef AsmString = + MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName(); + if (AsmString.startswith_lower("fsqrtd")) { + // this is an inline fsqrts instruction Opcode = SP::FSQRTD; - } else if (AsmString.find(FDIVDOpCode) == - 0) { // this is an inline fsqrts instruction + } else if (AsmString.startswith_lower("fdivd")) { + // this is an inline fsqrts instruction Opcode = SP::FDIVD; } } @@ -390,10 +380,12 @@ // already have been converted to FSQRTD or FDIVD earlier in the // pipeline. if (Opcode == SP::FSQRTD || Opcode == SP::FDIVD) { + // Insert 5 NOPs before FSQRTD,FDIVD. for (int InsertedCount = 0; InsertedCount < 5; InsertedCount++) BuildMI(MBB, MBBI, DL, TII.get(SP::NOP)); MachineBasicBlock::iterator NMBBI = std::next(MBBI); + // ... and inserting 28 NOPs after FSQRTD,FDIVD. for (int InsertedCount = 0; InsertedCount < 28; InsertedCount++) BuildMI(MBB, NMBBI, DL, TII.get(SP::NOP)); @@ -404,3 +396,538 @@ return Modified; } + +//***************************************************************************** +//**** ReplaceSDIV pass +//***************************************************************************** +// This pass fixes the incorrectly working SDIV instruction that +// exist for some earlier versions of the LEON processor line. The instruction +// is replaced with an SDIVcc instruction instead, which is working. +// +char ReplaceSDIV::ID = 0; + +ReplaceSDIV::ReplaceSDIV() : LEONMachineFunctionPass(ID) {} + +ReplaceSDIV::ReplaceSDIV(TargetMachine &tm) : LEONMachineFunctionPass(tm, ID) {} + +bool ReplaceSDIV::runOnMachineFunction(MachineFunction &MF) { + Subtarget = &MF.getSubtarget(); + const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); + + bool Modified = false; + for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) { + MachineBasicBlock &MBB = *MFI; + for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) { + MachineInstr &MI = *MBBI; + unsigned Opcode = MI.getOpcode(); + if (Opcode == SP::SDIVrr) { + MI.setDesc(TII.get(SP::SDIVCCrr)); + Modified = true; + } else if (Opcode == SP::SDIVri) { + MI.setDesc(TII.get(SP::SDIVCCri)); + Modified = true; + } + } + } + + return Modified; +} + +static RegisterPass X("replace-sdiv", "Replase SDIV Pass", false, + false); + +//***************************************************************************** +//**** FixCALL pass +//***************************************************************************** +// This pass restricts the size of the immediate operand of the CALL +// instruction, which can cause problems on some earlier versions of the LEON +// processor, which can interpret some of the call address bits incorrectly. +// +char FixCALL::ID = 0; + +FixCALL::FixCALL(TargetMachine &tm) : LEONMachineFunctionPass(tm, ID) {} + +bool FixCALL::runOnMachineFunction(MachineFunction &MF) { + bool Modified = false; + + for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) { + MachineBasicBlock &MBB = *MFI; + for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) { + MachineInstr &MI = *MBBI; + MI.print(errs()); + errs() << "\n"; + + unsigned Opcode = MI.getOpcode(); + if (Opcode == SP::CALL || Opcode == SP::CALLrr) { + unsigned NumOperands = MI.getNumOperands(); + for (unsigned OperandIndex = 0; OperandIndex < NumOperands; + OperandIndex++) { + MachineOperand &MO = MI.getOperand(OperandIndex); + if (MO.isImm()) { + int64_t Value = MO.getImm(); + MO.setImm(Value & 0x000fffffL); + Modified = true; + break; + } + } + } else if (MI.isInlineAsm()) // inline assembly immediate call + { + StringRef AsmString = + MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName(); + if (AsmString.startswith_lower("call")) { + // this is an inline call instruction + unsigned StartOp = InlineAsm::MIOp_FirstOperand; + + // extracts the registers from the inline assembly instruction + for (unsigned i = StartOp, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (MO.isImm()) { + int64_t Value = MO.getImm(); + MO.setImm(Value & 0x000fffffL); + Modified = true; + } + } + } + } + } + } + + return Modified; +} + +//***************************************************************************** +//**** IgnoreZeroFlag pass +//***************************************************************************** +// This erratum fix fixes the overflow behavior of SDIVCC and UDIVCC +// instructions that exists on some earlier LEON processors. Where these +// instructions are detected, they are replaced by a sequence that will +// explicitly write the overflow bit flag if this is required. +// +char IgnoreZeroFlag::ID = 0; + +IgnoreZeroFlag::IgnoreZeroFlag(TargetMachine &tm) + : LEONMachineFunctionPass(tm, ID) {} + +bool IgnoreZeroFlag::runOnMachineFunction(MachineFunction &MF) { + Subtarget = &MF.getSubtarget(); + const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); + DebugLoc DL = DebugLoc(); + + bool Modified = false; + for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) { + MachineBasicBlock &MBB = *MFI; + for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) { + MachineInstr &MI = *MBBI; + unsigned Opcode = MI.getOpcode(); + if (Opcode == SP::SDIVCCrr || Opcode == SP::SDIVCCri || + Opcode == SP::UDIVCCrr || Opcode == SP::UDIVCCri) { + + // split the current machine basic block - just after the sdivcc/udivcc + // instruction + // create a label that help us skip the zero flag update (of PSR - + // Processor Status Register) + // if conditions are not met + const BasicBlock *LLVM_BB = MBB.getBasicBlock(); + MachineFunction::iterator It = + std::next(MachineFunction::iterator(MBB)); + + MachineBasicBlock *dneBB = MF.CreateMachineBasicBlock(LLVM_BB); + MF.insert(It, dneBB); + + // Transfer the remainder of MBB and its successor edges to dneBB. + dneBB->splice(dneBB->begin(), &MBB, + std::next(MachineBasicBlock::iterator(MI)), MBB.end()); + dneBB->transferSuccessorsAndUpdatePHIs(&MBB); + + MBB.addSuccessor(dneBB); + + MachineBasicBlock::iterator NextMBBI = std::next(MBBI); + + // bvc - branch if overflow flag not set + BuildMI(MBB, NextMBBI, DL, TII.get(SP::BCOND)) + .addMBB(dneBB) + .addImm(SPCC::ICC_VS); + + // bnz - branch if not zero + BuildMI(MBB, NextMBBI, DL, TII.get(SP::BCOND)) + .addMBB(dneBB) + .addImm(SPCC::ICC_NE); + + // use the WRPSR (Write Processor State Register) instruction to set the + // zeo flag to 1 + // create wr %g0, 1, %psr + BuildMI(MBB, NextMBBI, DL, TII.get(SP::WRPSRri)) + .addReg(SP::G0) + .addImm(1); + + BuildMI(MBB, NextMBBI, DL, TII.get(SP::NOP)); + + Modified = true; + } else if (MI.isInlineAsm()) { + StringRef AsmString = + MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName(); + if (AsmString.startswith_lower("sdivcc") || + AsmString.startswith_lower("udivcc")) { + // this is an inline SDIVCC or UDIVCC instruction + + // split the current machine basic block - just after the + // sdivcc/udivcc instruction + // create a label that help us skip the zero flag update (of PSR - + // Processor Status Register) + // if conditions are not met + const BasicBlock *LLVM_BB = MBB.getBasicBlock(); + MachineFunction::iterator It = + std::next(MachineFunction::iterator(MBB)); + + MachineBasicBlock *dneBB = MF.CreateMachineBasicBlock(LLVM_BB); + MF.insert(It, dneBB); + + // Transfer the remainder of MBB and its successor edges to dneBB. + dneBB->splice(dneBB->begin(), &MBB, + std::next(MachineBasicBlock::iterator(MI)), MBB.end()); + dneBB->transferSuccessorsAndUpdatePHIs(&MBB); + + MBB.addSuccessor(dneBB); + + MachineBasicBlock::iterator NextMBBI = std::next(MBBI); + + // bvc - branch if overflow flag not set + BuildMI(MBB, NextMBBI, DL, TII.get(SP::BCOND)) + .addMBB(dneBB) + .addImm(SPCC::ICC_VS); + + // bnz - branch if not zero + BuildMI(MBB, NextMBBI, DL, TII.get(SP::BCOND)) + .addMBB(dneBB) + .addImm(SPCC::ICC_NE); + + // use the WRPSR (Write Processor State Register) instruction to set + // the zeo flag to 1 + // create wr %g0, 1, %psr + BuildMI(MBB, NextMBBI, DL, TII.get(SP::WRPSRri)) + .addReg(SP::G0) + .addImm(1); + + BuildMI(MBB, NextMBBI, DL, TII.get(SP::NOP)); + + Modified = true; + } + } + } + } + + return Modified; +} + +//***************************************************************************** +//**** InsertNOPDoublePrecision pass +//***************************************************************************** +// This erratum fix for some earlier LEON processors fixes a problem where a +// double precision load will not yield the correct result if used in FMUL, +// FDIV, FADD, FSUB or FSQRT instructions later. If this sequence is detected, +// inserting a NOP between the two instructions will fix the erratum. +// 1.scans the code after register allocation; +// 2.checks for the problem conditions as described in the AT697E erratum +// “Odd-Numbered FPU Register Dependency not Properly Checked in some +// Double-Precision FPU Operations”; +// 3.inserts NOPs if the problem exists. +// +char InsertNOPDoublePrecision::ID = 0; + +InsertNOPDoublePrecision::InsertNOPDoublePrecision(TargetMachine &tm) + : LEONMachineFunctionPass(tm, ID) {} + +bool InsertNOPDoublePrecision::runOnMachineFunction(MachineFunction &MF) { + Subtarget = &MF.getSubtarget(); + const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); + DebugLoc DL = DebugLoc(); + + bool Modified = false; + for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) { + MachineBasicBlock &MBB = *MFI; + for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) { + MachineInstr &MI = *MBBI; + unsigned Opcode = MI.getOpcode(); + if (Opcode == SP::LDDFri || Opcode == SP::LDDFrr) { + MachineBasicBlock::iterator NMBBI = std::next(MBBI); + MachineInstr &NMI = *NMBBI; + + unsigned NextOpcode = NMI.getOpcode(); + // NMI.print(errs()); + if (NextOpcode == SP::FADDD || NextOpcode == SP::FSUBD || + NextOpcode == SP::FMULD || NextOpcode == SP::FDIVD) { + int RegAIndex = GetRegIndexForOperand(MI, 0); + int RegBIndex = GetRegIndexForOperand(NMI, 0); + int RegCIndex = + GetRegIndexForOperand(NMI, 2); // Second source operand is index 2 + int RegDIndex = + GetRegIndexForOperand(NMI, 1); // Destination operand is index 1 + + if ((RegAIndex == RegBIndex + 1 && RegBIndex == RegDIndex) || + (RegAIndex == RegCIndex + 1 && RegCIndex == RegDIndex) || + (RegAIndex == RegBIndex + 1 && RegCIndex == RegDIndex) || + (RegAIndex == RegCIndex + 1 && RegBIndex == RegDIndex)) { + // Insert NOP between the two instructions. + BuildMI(MBB, NMBBI, DL, TII.get(SP::NOP)); + Modified = true; + } + + // Check the errata patterns that only happen for FADDD and FMULD + if (Modified == false && + (NextOpcode == SP::FADDD || NextOpcode == SP::FMULD)) { + RegAIndex = GetRegIndexForOperand(MI, 1); + if (RegAIndex == RegBIndex + 1 && RegBIndex == RegCIndex && + RegBIndex == RegDIndex) { + // Insert NOP between the two instructions. + BuildMI(MBB, NMBBI, DL, TII.get(SP::NOP)); + Modified = true; + } + } + } else if (NextOpcode == SP::FSQRTD) { + int RegAIndex = GetRegIndexForOperand(MI, 1); + int RegBIndex = GetRegIndexForOperand(NMI, 0); + int RegCIndex = GetRegIndexForOperand(NMI, 1); + + if (RegAIndex == RegBIndex + 1 && RegBIndex == RegCIndex) { + // Insert NOP between the two instructions. + BuildMI(MBB, NMBBI, DL, TII.get(SP::NOP)); + Modified = true; + } + } + } + } + } + + return Modified; +} + +//***************************************************************************** +//**** PreventRoundChange pass +//***************************************************************************** +// To prevent any explicit change of the default rounding mode, this pass +// detects any call of the fesetround function and removes this call from the +// list of generated operations. +// +char PreventRoundChange::ID = 0; + +PreventRoundChange::PreventRoundChange(TargetMachine &tm) + : LEONMachineFunctionPass(tm, ID) {} + +bool PreventRoundChange::runOnMachineFunction(MachineFunction &MF) { + Subtarget = &MF.getSubtarget(); + + bool Modified = false; + for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) { + MachineBasicBlock &MBB = *MFI; + for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) { + MachineInstr &MI = *MBBI; + unsigned Opcode = MI.getOpcode(); + if (Opcode == SP::CALL && MI.getNumOperands() > 0) { + MachineOperand &MO = MI.getOperand(0); + + if (MO.isGlobal()) { + StringRef FuncName = MO.getGlobal()->getName(); + if (FuncName.compare_lower("fesetround") == 0) { + MachineBasicBlock::iterator NMBBI = std::next(MBBI); + MI.eraseFromParent(); + MBBI = NMBBI; + Modified = true; + } + } + } + } + } + + return Modified; +} +//***************************************************************************** +//**** FlushCacheLineSWAP pass +//***************************************************************************** +// This pass inserts FLUSHW just before any SWAP atomic instruction. +// +char FlushCacheLineSWAP::ID = 0; + +FlushCacheLineSWAP::FlushCacheLineSWAP(TargetMachine &tm) + : LEONMachineFunctionPass(tm, ID) {} + +bool FlushCacheLineSWAP::runOnMachineFunction(MachineFunction &MF) { + Subtarget = &MF.getSubtarget(); + const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); + DebugLoc DL = DebugLoc(); + + bool Modified = false; + for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) { + MachineBasicBlock &MBB = *MFI; + for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) { + MachineInstr &MI = *MBBI; + unsigned Opcode = MI.getOpcode(); + if (Opcode == SP::SWAPrr || Opcode == SP::SWAPri || + Opcode == SP::LDSTUBrr || Opcode == SP::LDSTUBri) { + // insert flush and 5 NOPs before the swap/ldstub instruction + BuildMI(MBB, MBBI, DL, TII.get(SP::FLUSH)); + BuildMI(MBB, MBBI, DL, TII.get(SP::NOP)); + BuildMI(MBB, MBBI, DL, TII.get(SP::NOP)); + BuildMI(MBB, MBBI, DL, TII.get(SP::NOP)); + BuildMI(MBB, MBBI, DL, TII.get(SP::NOP)); + BuildMI(MBB, MBBI, DL, TII.get(SP::NOP)); + + Modified = true; + } else if (MI.isInlineAsm()) { + StringRef AsmString = + MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName(); + if (AsmString.startswith_lower("swap") || + AsmString.startswith_lower("ldstub")) { + // this is an inline swap or ldstub instruction + + // insert flush and 5 NOPs before the swap/ldstub instruction + BuildMI(MBB, MBBI, DL, TII.get(SP::FLUSH)); + BuildMI(MBB, MBBI, DL, TII.get(SP::NOP)); + BuildMI(MBB, MBBI, DL, TII.get(SP::NOP)); + BuildMI(MBB, MBBI, DL, TII.get(SP::NOP)); + BuildMI(MBB, MBBI, DL, TII.get(SP::NOP)); + BuildMI(MBB, MBBI, DL, TII.get(SP::NOP)); + + Modified = true; + } + } + } + } + + return Modified; +} + +//***************************************************************************** +//**** InsertNOPsLoadStore pass +//***************************************************************************** +// This pass shall insert NOPs between floating point loads and stores when the +// following circumstances are present [5]: +// Pattern 1: +// 1. single-precision load or single-precision FPOP to register %fX, where X is +// the same register as the store being checked; +// 2. single-precision load or single-precision FPOP to register %fY , where Y +// is the opposite register in the same double-precision pair; +// 3. 0-3 instructions of any kind, except stores from %fX or %fY or operations +// with %fX as destination; +// 4. the store (from register %fX) being considered. +// Pattern 2: +// 1. double-precision FPOP; +// 2. any number of operations on any kind, except no double-precision FPOP and +// at most one (less than two) single-precision or single-to-double FPOPs; +// 3. the store (from register %fX) being considered. +// +char InsertNOPsLoadStore::ID = 0; + +InsertNOPsLoadStore::InsertNOPsLoadStore(TargetMachine &tm) + : LEONMachineFunctionPass(tm, ID) {} + +bool InsertNOPsLoadStore::runOnMachineFunction(MachineFunction &MF) { + Subtarget = &MF.getSubtarget(); + const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); + DebugLoc DL = DebugLoc(); + + MachineInstr *Pattern1FirstInstruction = NULL; + MachineInstr *Pattern2FirstInstruction = NULL; + unsigned int StoreInstructionsToCheck = 0; + int FxRegIndex, FyRegIndex; + + bool Modified = false; + for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) { + MachineBasicBlock &MBB = *MFI; + for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) { + MachineInstr &MI = *MBBI; + + if (StoreInstructionsToCheck > 0) { + if (((MI.getOpcode() == SP::STFrr || MI.getOpcode() == SP::STFri) && + (GetRegIndexForOperand(MI, LAST_OPERAND) == FxRegIndex || + GetRegIndexForOperand(MI, LAST_OPERAND) == FyRegIndex)) || + GetRegIndexForOperand(MI, 0) == FxRegIndex) { + // Insert four NOPs + for (unsigned InsertedCount = 0; InsertedCount < 4; InsertedCount++) { + BuildMI(MBB, MBBI, DL, TII.get(SP::NOP)); + } + Modified = true; + } + StoreInstructionsToCheck--; + } + + switch (MI.getOpcode()) { + // Watch for Pattern 1 FPop instructions + case SP::LDrr: + case SP::LDri: + case SP::LDFrr: + case SP::LDFri: + case SP::FADDS: + case SP::FSUBS: + case SP::FMULS: + case SP::FDIVS: + case SP::FSQRTS: + case SP::FCMPS: + case SP::FMOVS: + case SP::FNEGS: + case SP::FABSS: + case SP::FITOS: + case SP::FSTOI: + case SP::FITOD: + case SP::FDTOI: + case SP::FDTOS: + if (Pattern1FirstInstruction != NULL) { + FxRegIndex = GetRegIndexForOperand(*Pattern1FirstInstruction, 0); + FyRegIndex = GetRegIndexForOperand(MI, 0); + + // Check to see if these registers are part of the same double + // precision + // register pair. + int DoublePrecRegIndexForX = (FxRegIndex - SP::F0) / 2; + int DoublePrecRegIndexForY = (FyRegIndex - SP::F0) / 2; + + if (DoublePrecRegIndexForX == DoublePrecRegIndexForY) + StoreInstructionsToCheck = 4; + } + + Pattern1FirstInstruction = &MI; + break; + // End of Pattern 1 + + // Search for Pattern 2 + case SP::FADDD: + case SP::FSUBD: + case SP::FMULD: + case SP::FDIVD: + case SP::FSQRTD: + case SP::FCMPD: + Pattern2FirstInstruction = &MI; + Pattern1FirstInstruction = NULL; + break; + + case SP::STFrr: + case SP::STFri: + case SP::STDFrr: + case SP::STDFri: + if (Pattern2FirstInstruction != NULL) { + if (GetRegIndexForOperand(MI, LAST_OPERAND) == + GetRegIndexForOperand(*Pattern2FirstInstruction, 0)) { + // Insert four NOPs + for (unsigned InsertedCount = 0; InsertedCount < 4; + InsertedCount++) { + BuildMI(MBB, MBBI, DL, TII.get(SP::NOP)); + } + + Pattern2FirstInstruction = NULL; + } + } + Pattern1FirstInstruction = NULL; + break; + // End of Pattern 2 + + default: + // Ensure we don't count debug-only values while we're testing for the + // patterns. + if (!MI.isDebugValue()) + Pattern1FirstInstruction = NULL; + break; + } + } + } + + return Modified; +} Index: llvm/trunk/lib/Target/Sparc/Sparc.td =================================================================== --- llvm/trunk/lib/Target/Sparc/Sparc.td +++ llvm/trunk/lib/Target/Sparc/Sparc.td @@ -21,35 +21,34 @@ // def FeatureV9 - : SubtargetFeature<"v9", "IsV9", "true", - "Enable SPARC-V9 instructions">; + : SubtargetFeature<"v9", "IsV9", "true", "Enable SPARC-V9 instructions">; def FeatureV8Deprecated - : SubtargetFeature<"deprecated-v8", "V8DeprecatedInsts", "true", - "Enable deprecated V8 instructions in V9 mode">; + : SubtargetFeature<"deprecated-v8", "V8DeprecatedInsts", "true", + "Enable deprecated V8 instructions in V9 mode">; def FeatureVIS - : SubtargetFeature<"vis", "IsVIS", "true", - "Enable UltraSPARC Visual Instruction Set extensions">; + : SubtargetFeature<"vis", "IsVIS", "true", + "Enable UltraSPARC Visual Instruction Set extensions">; def FeatureVIS2 - : SubtargetFeature<"vis2", "IsVIS2", "true", - "Enable Visual Instruction Set extensions II">; + : SubtargetFeature<"vis2", "IsVIS2", "true", + "Enable Visual Instruction Set extensions II">; def FeatureVIS3 - : SubtargetFeature<"vis3", "IsVIS3", "true", - "Enable Visual Instruction Set extensions III">; + : SubtargetFeature<"vis3", "IsVIS3", "true", + "Enable Visual Instruction Set extensions III">; def FeatureLeon - : SubtargetFeature<"leon", "IsLeon", "true", - "Enable LEON extensions">; + : SubtargetFeature<"leon", "IsLeon", "true", "Enable LEON extensions">; def FeatureHardQuad - : SubtargetFeature<"hard-quad-float", "HasHardQuad", "true", - "Enable quad-word floating point instructions">; + : SubtargetFeature<"hard-quad-float", "HasHardQuad", "true", + "Enable quad-word floating point instructions">; def UsePopc : SubtargetFeature<"popc", "UsePopc", "true", "Use the popc (population count) instruction">; -def FeatureSoftFloat : SubtargetFeature<"soft-float", "UseSoftFloat", "true", - "Use software emulation for floating point">; +def FeatureSoftFloat + : SubtargetFeature<"soft-float", "UseSoftFloat", "true", + "Use software emulation for floating point">; -//==== Features added predmoninantly for LEON subtarget support +//==== Features added predmoninantly for LEON subtarget support include "LeonFeatures.td" //===----------------------------------------------------------------------===// @@ -63,87 +62,92 @@ def SparcInstrInfo : InstrInfo; -def SparcAsmParser : AsmParser { - bit ShouldEmitMatchRegisterName = 0; -} +def SparcAsmParser : AsmParser { bit ShouldEmitMatchRegisterName = 0; } //===----------------------------------------------------------------------===// // SPARC processors supported. //===----------------------------------------------------------------------===// class Proc Features> - : Processor; + : Processor; -def : Proc<"generic", []>; -def : Proc<"v7", []>; -def : Proc<"v8", []>; -def : Proc<"supersparc", []>; -def : Proc<"sparclite", []>; -def : Proc<"f934", []>; -def : Proc<"hypersparc", []>; -def : Proc<"sparclite86x", []>; -def : Proc<"sparclet", []>; -def : Proc<"tsc701", []>; -def : Proc<"myriad2", []>; -def : Proc<"myriad2.1", []>; -def : Proc<"myriad2.2", []>; -def : Proc<"v9", [FeatureV9]>; -def : Proc<"ultrasparc", [FeatureV9, FeatureV8Deprecated, FeatureVIS]>; -def : Proc<"ultrasparc3", [FeatureV9, FeatureV8Deprecated, FeatureVIS, - FeatureVIS2]>; -def : Proc<"niagara", [FeatureV9, FeatureV8Deprecated, FeatureVIS, - FeatureVIS2]>; -def : Proc<"niagara2", [FeatureV9, FeatureV8Deprecated, UsePopc, - FeatureVIS, FeatureVIS2]>; -def : Proc<"niagara3", [FeatureV9, FeatureV8Deprecated, UsePopc, - FeatureVIS, FeatureVIS2]>; -def : Proc<"niagara4", [FeatureV9, FeatureV8Deprecated, UsePopc, - FeatureVIS, FeatureVIS2, FeatureVIS3]>; +def : Proc<"generic", []>; +def : Proc<"v7", []>; +def : Proc<"v8", []>; +def : Proc<"supersparc", []>; +def : Proc<"sparclite", []>; +def : Proc<"f934", []>; +def : Proc<"hypersparc", []>; +def : Proc<"sparclite86x", []>; +def : Proc<"sparclet", []>; +def : Proc<"tsc701", []>; +def : Proc<"myriad2", []>; +def : Proc<"myriad2.1", []>; +def : Proc<"myriad2.2", []>; +def : Proc<"v9", [ FeatureV9 ]>; +def : Proc<"ultrasparc", [ FeatureV9, FeatureV8Deprecated, FeatureVIS ]>; +def : Proc<"ultrasparc3", + [ FeatureV9, FeatureV8Deprecated, FeatureVIS, FeatureVIS2 ]>; +def : Proc<"niagara", + [ FeatureV9, FeatureV8Deprecated, FeatureVIS, FeatureVIS2 ]>; +def : Proc<"niagara2", [ + FeatureV9, FeatureV8Deprecated, UsePopc, FeatureVIS, FeatureVIS2 +]>; +def : Proc<"niagara3", [ + FeatureV9, FeatureV8Deprecated, UsePopc, FeatureVIS, FeatureVIS2 +]>; +def : Proc<"niagara4", [ + FeatureV9, FeatureV8Deprecated, UsePopc, FeatureVIS, FeatureVIS2, FeatureVIS3 +]>; // LEON 2 FT generic -def : Processor<"leon2", LEON2Itineraries, - [FeatureLeon]>; +def : Processor<"leon2", LEON2Itineraries, [ FeatureLeon ]>; // LEON 2 FT (AT697E) -// TO DO: Place-holder: Processor specific features will be added *very* soon here. -def : Processor<"at697e", LEON2Itineraries, - [FeatureLeon, InsertNOPLoad]>; +// AT697E: Provides full coverage of AT697E - covers all the erratum fixes for +// LEON2 AT697E +def : Processor<"at697e", LEON2Itineraries, [ + FeatureLeon, ReplaceSDIV, FixCALL, IgnoreZeroFlag, InsertNOPDoublePrecision +]>; // LEON 2 FT (AT697F) -// TO DO: Place-holder: Processor specific features will be added *very* soon here. +// AT697F: Provides full coverage of AT697F - covers all the erratum fixes for +// LEON2 AT697F def : Processor<"at697f", LEON2Itineraries, - [FeatureLeon, InsertNOPLoad]>; - + [ FeatureLeon, InsertNOPDoublePrecision ]>; // LEON 3 FT generic -def : Processor<"leon3", LEON3Itineraries, - [FeatureLeon, UMACSMACSupport]>; +def : Processor<"leon3", LEON3Itineraries, [ FeatureLeon, UMACSMACSupport ]>; // LEON 3 FT (UT699). Provides features for the UT699 processor -// - covers all the erratum fixes for LEON3, but does not support the CASA instruction. -def : Processor<"ut699", LEON3Itineraries, - [FeatureLeon, InsertNOPLoad, FixFSMULD, ReplaceFMULS, FixAllFDIVSQRT]>; +// - covers all the erratum fixes for LEON3, but does not support the CASA +// instruction. +def : Processor<"ut699", LEON3Itineraries, [ + FeatureLeon, FixFSMULD, ReplaceFMULS, PreventRoundChange, + FixAllFDIVSQRT, InsertNOPLoad, FlushCacheLineSWAP, InsertNOPsLoadStore +]>; // LEON3 FT (GR712RC). Provides features for the GR712RC processor. -// - covers all the erratum fixed for LEON3 and support for the CASA instruction. +// - covers all the erratum fixed for LEON3 and support for the CASA +// instruction. def : Processor<"gr712rc", LEON3Itineraries, - [FeatureLeon, LeonCASA]>; + [ FeatureLeon, LeonCASA ]>; // LEON 4 FT generic def : Processor<"leon4", LEON4Itineraries, - [FeatureLeon, UMACSMACSupport, LeonCASA]>; + [ FeatureLeon, LeonCASA ]>; -// LEON 4 FT (GR740) -// TO DO: Place-holder: Processor specific features will be added *very* soon here. -def : Processor<"gr740", LEON4Itineraries, - [FeatureLeon, UMACSMACSupport, LeonCASA]>; +// GR740: Provides full coverage of GR740 - covers all the erratum fixes for +// LEON3 + support to CASA + LEON 4 instruction timings +def : Processor<"gr740", LEON4Itineraries, + [ FeatureLeon, LeonCASA ]> {} //===----------------------------------------------------------------------===// // Declare the target which we are implementing //===----------------------------------------------------------------------===// def SparcAsmWriter : AsmWriter { - string AsmWriterClassName = "InstPrinter"; + string AsmWriterClassName = "InstPrinter"; int PassSubtarget = 1; int Variant = 0; } @@ -151,6 +155,6 @@ def Sparc : Target { // Pull in Instruction Info: let InstructionSet = SparcInstrInfo; - let AssemblyParsers = [SparcAsmParser]; - let AssemblyWriters = [SparcAsmWriter]; + let AssemblyParsers = [ SparcAsmParser ]; + let AssemblyWriters = [ SparcAsmWriter ]; } Index: llvm/trunk/lib/Target/Sparc/SparcISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/Sparc/SparcISelLowering.cpp +++ llvm/trunk/lib/Target/Sparc/SparcISelLowering.cpp @@ -32,7 +32,6 @@ #include "llvm/Support/ErrorHandling.h" using namespace llvm; - //===----------------------------------------------------------------------===// // Calling Convention Implementation //===----------------------------------------------------------------------===// @@ -1636,9 +1635,7 @@ // Atomics are supported on SparcV9. 32-bit atomics are also // supported by some Leon SparcV8 variants. Otherwise, atomics // are unsupported. - if (Subtarget->isV9()) - setMaxAtomicSizeInBitsSupported(64); - else if (Subtarget->hasLeonCasa()) + if (Subtarget->isV9() || Subtarget->hasLeonCasa()) setMaxAtomicSizeInBitsSupported(64); else setMaxAtomicSizeInBitsSupported(0); @@ -2663,7 +2660,6 @@ uint64_t depth = Op.getConstantOperandVal(0); return getFRAMEADDR(depth, Op, DAG, Subtarget); - } static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG, @@ -3094,7 +3090,7 @@ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { switch (MI.getOpcode()) { - default: llvm_unreachable("Unknown SELECT_CC!"); + default: llvm_unreachable("Unknown Custom Instruction!"); case SP::SELECT_CC_Int_ICC: case SP::SELECT_CC_FP_ICC: case SP::SELECT_CC_DFP_ICC: @@ -3111,7 +3107,6 @@ case SP::EH_SJLJ_LONGJMP32rr: case SP::EH_SJLJ_LONGJMP32ri: return emitEHSjLjLongJmp(MI, BB); - } } @@ -3382,8 +3377,11 @@ SparcTargetLowering::getConstraintType(StringRef Constraint) const { if (Constraint.size() == 1) { switch (Constraint[0]) { - default: break; - case 'r': return C_RegisterClass; + default: + break; + case 'f': + case 'r': + return C_RegisterClass; case 'I': // SIMM13 return C_Other; } @@ -3457,6 +3455,9 @@ MVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { + case 'f': + return std::make_pair(0U, &SP::FPRegsRegClass); + case 'r': if (VT == MVT::v2i32) return std::make_pair(0U, &SP::IntPairRegClass); Index: llvm/trunk/lib/Target/Sparc/SparcSubtarget.h =================================================================== --- llvm/trunk/lib/Target/Sparc/SparcSubtarget.h +++ llvm/trunk/lib/Target/Sparc/SparcSubtarget.h @@ -48,6 +48,14 @@ bool FixFSMULD; bool ReplaceFMULS; bool FixAllFDIVSQRT; + bool UseSoftFpu; + bool PerformSDIVReplace; + bool FixCallImmediates; + bool IgnoreZeroFlag; + bool InsertNOPDoublePrecision; + bool PreventRoundChange; + bool FlushCacheLineSWAP; + bool InsertNOPsLoadStore; SparcInstrInfo InstrInfo; SparcTargetLowering TLInfo; @@ -85,12 +93,20 @@ bool useSoftFloat() const { return UseSoftFloat; } // Leon options - bool hasUmacSmac() const { return HasUmacSmac; } + bool useSoftFpu() const { return UseSoftFpu; } bool hasLeonCasa() const { return HasLeonCasa; } - bool insertNOPLoad() const { return InsertNOPLoad; } + bool hasUmacSmac() const { return HasUmacSmac; } + bool performSDIVReplace() const { return PerformSDIVReplace; } + bool fixCallImmediates() const { return FixCallImmediates; } + bool ignoreZeroFlag() const { return IgnoreZeroFlag; } + bool insertNOPDoublePrecision() const { return InsertNOPDoublePrecision; } bool fixFSMULD() const { return FixFSMULD; } bool replaceFMULS() const { return ReplaceFMULS; } + bool preventRoundChange() const { return PreventRoundChange; } bool fixAllFDIVSQRT() const { return FixAllFDIVSQRT; } + bool flushCacheLineSWAP() const { return FlushCacheLineSWAP; } + bool insertNOPsLoadStore() const { return InsertNOPsLoadStore; } + bool insertNOPLoad() const { return InsertNOPLoad; } /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. Index: llvm/trunk/lib/Target/Sparc/SparcSubtarget.cpp =================================================================== --- llvm/trunk/lib/Target/Sparc/SparcSubtarget.cpp +++ llvm/trunk/lib/Target/Sparc/SparcSubtarget.cpp @@ -39,10 +39,17 @@ // Leon features HasLeonCasa = false; HasUmacSmac = false; - InsertNOPLoad = false; + PerformSDIVReplace = false; + FixCallImmediates = false; + IgnoreZeroFlag = false; + InsertNOPDoublePrecision = false; FixFSMULD = false; ReplaceFMULS = false; + PreventRoundChange = false; FixAllFDIVSQRT = false; + InsertNOPLoad = false; + FlushCacheLineSWAP = false; + InsertNOPsLoadStore = false; // Determine default and user specified characteristics std::string CPUName = CPU; Index: llvm/trunk/lib/Target/Sparc/SparcTargetMachine.cpp =================================================================== --- llvm/trunk/lib/Target/Sparc/SparcTargetMachine.cpp +++ llvm/trunk/lib/Target/Sparc/SparcTargetMachine.cpp @@ -76,7 +76,7 @@ SparcTargetMachine::~SparcTargetMachine() {} -const SparcSubtarget * +const SparcSubtarget * SparcTargetMachine::getSubtargetImpl(const Function &F) const { Attribute CPUAttr = F.getFnAttribute("target-cpu"); Attribute FSAttr = F.getFnAttribute("target-features"); @@ -95,7 +95,7 @@ F.hasFnAttribute("use-soft-float") && F.getFnAttribute("use-soft-float").getValueAsString() == "true"; - if (softFloat) + if (softFloat) FS += FS.empty() ? "+soft-float" : ",+soft-float"; auto &I = SubtargetMap[CPU + FS]; @@ -115,7 +115,7 @@ class SparcPassConfig : public TargetPassConfig { public: SparcPassConfig(SparcTargetMachine *TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) {} + : TargetPassConfig(TM, PM) {} SparcTargetMachine &getSparcTargetMachine() const { return getTM(); @@ -142,28 +142,46 @@ return false; } -void SparcPassConfig::addPreEmitPass(){ +void SparcPassConfig::addPreEmitPass() { addPass(createSparcDelaySlotFillerPass(getSparcTargetMachine())); - - if (this->getSparcTargetMachine().getSubtargetImpl()->insertNOPLoad()) - { - addPass(new InsertNOPLoad(getSparcTargetMachine())); + if (this->getSparcTargetMachine().getSubtargetImpl()->ignoreZeroFlag()) { + addPass(new IgnoreZeroFlag(getSparcTargetMachine())); + } + if (this->getSparcTargetMachine().getSubtargetImpl()->performSDIVReplace()) { + addPass(new ReplaceSDIV(getSparcTargetMachine())); } - if (this->getSparcTargetMachine().getSubtargetImpl()->fixFSMULD()) - { + if (this->getSparcTargetMachine().getSubtargetImpl()->fixCallImmediates()) { + addPass(new FixCALL(getSparcTargetMachine())); + } + if (this->getSparcTargetMachine().getSubtargetImpl()->fixFSMULD()) { addPass(new FixFSMULD(getSparcTargetMachine())); } - if (this->getSparcTargetMachine().getSubtargetImpl()->replaceFMULS()) - { + if (this->getSparcTargetMachine().getSubtargetImpl()->replaceFMULS()) { addPass(new ReplaceFMULS(getSparcTargetMachine())); } - if (this->getSparcTargetMachine().getSubtargetImpl()->fixAllFDIVSQRT()) - { + if (this->getSparcTargetMachine().getSubtargetImpl()->preventRoundChange()) { + addPass(new PreventRoundChange(getSparcTargetMachine())); + } + if (this->getSparcTargetMachine().getSubtargetImpl()->fixAllFDIVSQRT()) { addPass(new FixAllFDIVSQRT(getSparcTargetMachine())); } + if (this->getSparcTargetMachine().getSubtargetImpl()->insertNOPsLoadStore()) { + addPass(new InsertNOPsLoadStore(getSparcTargetMachine())); + } + if (this->getSparcTargetMachine().getSubtargetImpl()->insertNOPLoad()) { + addPass(new InsertNOPLoad(getSparcTargetMachine())); + } + if (this->getSparcTargetMachine().getSubtargetImpl()->flushCacheLineSWAP()) { + addPass(new FlushCacheLineSWAP(getSparcTargetMachine())); + } + if (this->getSparcTargetMachine() + .getSubtargetImpl() + ->insertNOPDoublePrecision()) { + addPass(new InsertNOPDoublePrecision(getSparcTargetMachine())); + } } -void SparcV8TargetMachine::anchor() { } +void SparcV8TargetMachine::anchor() {} SparcV8TargetMachine::SparcV8TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, @@ -173,7 +191,7 @@ CodeGenOpt::Level OL) : SparcTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} -void SparcV9TargetMachine::anchor() { } +void SparcV9TargetMachine::anchor() {} SparcV9TargetMachine::SparcV9TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, Index: llvm/trunk/test/CodeGen/SPARC/LeonFixCALLPassUT.ll =================================================================== --- llvm/trunk/test/CodeGen/SPARC/LeonFixCALLPassUT.ll +++ llvm/trunk/test/CodeGen/SPARC/LeonFixCALLPassUT.ll @@ -0,0 +1,20 @@ +; RUN: llc %s -O0 -march=sparc -mcpu=at697e -o - | FileCheck %s -check-prefix=FIXCALL +; RUN: llc %s -O0 -march=sparc -mcpu=leon2 -mattr=+fixcall -o - | FileCheck %s -check-prefix=FIXCALL + +; RUN: llc %s -O0 -march=sparc -mcpu=at697e -mattr=-fixcall -o - | FileCheck %s -check-prefix=NO_FIXCALL +; RUN: llc %s -O0 -march=sparc -mcpu=leon2 -o - | FileCheck %s -check-prefix=NO_FIXCALL + + +; FIXCALL-LABEL: immediate_call_test +; FIXCALL: call 763288 + +; NO_FIXCALL-LABEL: immediate_call_test +; NO_FIXCALL: call 2047583640 +define void @immediate_call_test() nounwind { +entry: + call void asm sideeffect "call $0", "i"(i32 2047583640) nounwind + ret void +} + + + Index: llvm/trunk/test/CodeGen/SPARC/LeonInsertNOPLoad.ll =================================================================== --- llvm/trunk/test/CodeGen/SPARC/LeonInsertNOPLoad.ll +++ llvm/trunk/test/CodeGen/SPARC/LeonInsertNOPLoad.ll @@ -0,0 +1,13 @@ +; RUN: llc %s -O0 -march=sparc -mcpu=ut699 -o - | FileCheck %s + +; CHECK: ld [%o0+%lo(.LCPI0_0)], %f0 +; CHECK-NEXT: nop + + +define float @X() #0 { +entry: + %f = alloca float, align 4 + store float 0x3FF3C08320000000, float* %f, align 4 + %0 = load float, float* %f, align 4 + ret float %0 +} Index: llvm/trunk/test/CodeGen/SPARC/LeonInsertNOPsDoublePrecision.ll =================================================================== --- llvm/trunk/test/CodeGen/SPARC/LeonInsertNOPsDoublePrecision.ll +++ llvm/trunk/test/CodeGen/SPARC/LeonInsertNOPsDoublePrecision.ll @@ -0,0 +1,17 @@ +; RUN: llc %s -O0 -march=sparc -mcpu=at697f -o - | FileCheck %s + +; CHECK: ldd +; CHECK: ldd +; CHECK-NEXT: nop + +define double @mult() #0 { +entry: + %x = alloca double, align 8 + %y = alloca double, align 8 + store double 3.141590e+00, double* %x, align 8 + store double 1.234560e+00, double* %y, align 8 + %0 = load double, double* %x, align 8 + %1 = load double, double* %y, align 8 + %mul = fmul double %0, %1 + ret double %mul +} Index: llvm/trunk/test/CodeGen/SPARC/LeonPreventRoundChangePassUT.ll =================================================================== --- llvm/trunk/test/CodeGen/SPARC/LeonPreventRoundChangePassUT.ll +++ llvm/trunk/test/CodeGen/SPARC/LeonPreventRoundChangePassUT.ll @@ -0,0 +1,65 @@ +; RUN: llc %s -O0 -march=sparc -mcpu=ut699 -o - | FileCheck %s -check-prefix=NO_ROUND_FUNC +; RUN: llc %s -O0 -march=sparc -mcpu=leon3 -mattr=+prvntroundchange -o - | FileCheck %s -check-prefix=NO_ROUND_FUNC + +; RUN: llc %s -O0 -march=sparc -mcpu=ut699 -mattr=-prvntroundchange -o - | FileCheck %s -check-prefix=ROUND_FUNC +; RUN: llc %s -O0 -march=sparc -mcpu=leon3 -o - | FileCheck %s -check-prefix=ROUND_FUNC + + +; NO_ROUND_FUNC-LABEL: test_round_change +; NO_ROUND_FUNC-NOT: fesetround + +; ROUND_FUNC-LABEL: test_round_change +; ROUND_FUNC: fesetround + +; ModuleID = '' +target datalayout = "E-m:e-p:32:32-i64:64-f128:64-n32-S64" +target triple = "sparc-unknown--eabi" + +@.str = private unnamed_addr constant [17 x i8] c"-((-a)*b) != a*b\00", align 1 +@.str.1 = private unnamed_addr constant [7 x i8] c"test.c\00", align 1 +@__PRETTY_FUNCTION__.mult = private unnamed_addr constant [12 x i8] c"void mult()\00", align 1 + +; Function Attrs: nounwind +define void @test_round_change() #0 { +entry: + %a = alloca double, align 8 + %b = alloca double, align 8 + %x = alloca float, align 4 + store double 1.100000e+00, double* %a, align 8 + store double 1.010000e+01, double* %b, align 8 + store float 0x400921FA00000000, float* %x, align 4 + %call = call i32 @fesetround(i32 2048) #2 + %0 = load double, double* %a, align 8 + %sub = fsub double -0.000000e+00, %0 + %1 = load double, double* %b, align 8 + %mul = fmul double %sub, %1 + %sub1 = fsub double -0.000000e+00, %mul + %2 = load double, double* %a, align 8 + %3 = load double, double* %b, align 8 + %mul2 = fmul double %2, %3 + %cmp = fcmp une double %sub1, %mul2 + br i1 %cmp, label %cond.true, label %cond.false + +cond.true: ; preds = %entry + br label %cond.end + +cond.false: ; preds = %entry + call void @__assert_fail(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.1, i32 0, i32 0), i32 10, i8* getelementptr inbounds ([12 x i8], [12 x i8]* @__PRETTY_FUNCTION__.mult, i32 0, i32 0)) #3 + unreachable + ; No predecessors! + br label %cond.end + +cond.end: ; preds = %4, %cond.true + ret void +} + +; Function Attrs: nounwind +declare i32 @fesetround(i32) #0 + +; Function Attrs: noreturn nounwind +declare void @__assert_fail(i8*, i8*, i32, i8*) #1 + +attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { noreturn nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind } +attributes #3 = { noreturn nounwind } \ No newline at end of file Index: llvm/trunk/test/CodeGen/SPARC/LeonReplaceSDIVPassUT.ll =================================================================== --- llvm/trunk/test/CodeGen/SPARC/LeonReplaceSDIVPassUT.ll +++ llvm/trunk/test/CodeGen/SPARC/LeonReplaceSDIVPassUT.ll @@ -0,0 +1,9 @@ +; RUN: llc %s -O0 -march=sparc -mcpu=at697e -o - | FileCheck %s + +; CHECK: sdivcc %o0, %o1, %o0 + +define i32 @lbr59(i32 %a, i32 %b) +{ + %r = sdiv i32 %a, %b + ret i32 %r +}