Index: lib/Target/X86/X86.td =================================================================== --- lib/Target/X86/X86.td +++ lib/Target/X86/X86.td @@ -166,6 +166,8 @@ "Call register indirect">; def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true", "LEA instruction needs inputs at AG stage">; +def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true", + "LEA instruction with certain arguments is slow">; //===----------------------------------------------------------------------===// // X86 processors supported. @@ -226,6 +228,7 @@ FeaturePCLMUL, FeatureAES, FeatureCallRegIndirect, FeaturePRFCHW, + FeatureSlowLEA, FeatureSlowBTMem, FeatureFastUAMem]>; // "Arrandale" along with corei3 and corei5 def : ProcessorModel<"corei7", SandyBridgeModel, Index: lib/Target/X86/X86FixupLEAs.cpp =================================================================== --- lib/Target/X86/X86FixupLEAs.cpp +++ lib/Target/X86/X86FixupLEAs.cpp @@ -57,6 +57,11 @@ void processInstruction(MachineBasicBlock::iterator& I, MachineFunction::iterator MFI); + /// \brief Given a LEA instruction which is unprofitable + /// on Silvermont try to replace it with an equivalent ADD instruction + void processInstructionForSLM(MachineBasicBlock::iterator& I, + MachineFunction::iterator MFI); + /// \brief Determine if an instruction references a machine register /// and, if so, whether it reads or writes the register. RegUsageState usesRegister(MachineOperand& p, @@ -86,7 +91,7 @@ private: MachineFunction *MF; const TargetMachine *TM; - const TargetInstrInfo *TII; // Machine instruction info. + const X86InstrInfo *TII; // Machine instruction info. }; char FixupLEAPass::ID = 0; @@ -98,7 +103,7 @@ MachineInstr* MI = MBBI; MachineInstr* NewMI; switch (MI->getOpcode()) { - case X86::MOV32rr: + case X86::MOV32rr: case X86::MOV64rr: { const MachineOperand& Src = MI->getOperand(1); const MachineOperand& Dest = MI->getOperand(0); @@ -146,7 +151,7 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) { MF = &Func; TM = &MF->getTarget(); - TII = TM->getInstrInfo(); + TII = static_cast(TM->getInstrInfo()); DEBUG(dbgs() << "Start X86FixupLEAs\n";); // Process all basic blocks. @@ -243,9 +248,9 @@ MachineInstr* NewMI = postRAConvertToLEA(MFI, MBI); if (NewMI) { ++NumLEAs; - DEBUG(dbgs() << "Candidate to replace:"; MBI->dump();); + DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump();); // now to replace with an equivalent LEA... - DEBUG(dbgs() << "Replaced by: "; NewMI->dump();); + DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump();); MFI->erase(MBI); MachineBasicBlock::iterator J = static_cast (NewMI); @@ -254,10 +259,80 @@ } } +void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I, + MachineFunction::iterator MFI) { + MachineInstr *MI = I; + const int opcode = MI->getOpcode(); + if (opcode != X86::LEA16r && opcode != X86::LEA32r && opcode != X86::LEA64r && + opcode != X86::LEA64_32r) + return; + if (MI->getOperand(5).getReg() != 0 || !MI->getOperand(4).isImm() || + !TII->isSafeToClobberEFLAGS(*MFI, I)) + return; + const unsigned DstR = MI->getOperand(0).getReg(); + const unsigned SrcR1 = MI->getOperand(1).getReg(); + const unsigned SrcR2 = MI->getOperand(3).getReg(); + if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR)) + return; + if (MI->getOperand(2).getImm() > 1) + return; + int addrr_opcode, addri_opcode; + switch (opcode) { + case X86::LEA16r: + addrr_opcode = X86::ADD16rr; + addri_opcode = X86::ADD16ri; + break; + case X86::LEA32r: + addrr_opcode = X86::ADD32rr; + addri_opcode = X86::ADD32ri; + break; + case X86::LEA64_32r: + case X86::LEA64r: + addrr_opcode = X86::ADD64rr; + addri_opcode = X86::ADD64ri32; + break; + default: + assert(false && "Unexpected LEA instruction"); + } + DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump();); + DEBUG(dbgs() << "FixLEA: Replaced by: ";); + MachineInstr *NewMI = 0; + const MachineOperand &Dst = MI->getOperand(0); + // Make ADD instruction for two registers writing to LEA's destination + if (SrcR1 != 0 && SrcR2 != 0) { + const MachineOperand &Src1 = MI->getOperand(SrcR1 == DstR ? 1 : 3); + const MachineOperand &Src2 = MI->getOperand(SrcR1 == DstR ? 3 : 1); + NewMI = BuildMI(*MF, MI->getDebugLoc(), TII->get(addrr_opcode)) + .addOperand(Dst) + .addOperand(Src1) + .addOperand(Src2); + MFI->insert(I, NewMI); + DEBUG(NewMI->dump();); + } + // Make ADD instruction for immediate + if (MI->getOperand(4).getImm() != 0) { + const MachineOperand &SrcR = MI->getOperand(SrcR1 == DstR ? 1 : 3); + NewMI = BuildMI(*MF, MI->getDebugLoc(), TII->get(addri_opcode)) + .addOperand(Dst) + .addOperand(SrcR) + .addImm(MI->getOperand(4).getImm()); + MFI->insert(I, NewMI); + DEBUG(NewMI->dump();); + } + if (NewMI) { + MFI->erase(I); + I = static_cast(NewMI); + } +} + bool FixupLEAPass::processBasicBlock(MachineFunction &MF, MachineFunction::iterator MFI) { - for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) - processInstruction(I, MFI); + for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) { + if (TM->getSubtarget().isSLM()) + processInstructionForSLM(I, MFI); + else + processInstruction(I, MFI); + } return false; } Index: lib/Target/X86/X86InstrInfo.h =================================================================== --- lib/Target/X86/X86InstrInfo.h +++ lib/Target/X86/X86InstrInfo.h @@ -359,6 +359,9 @@ /// instruction that defines the specified register class. bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const override; + bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + static bool isX86_64ExtendedReg(const MachineOperand &MO) { if (!MO.isReg()) return false; return X86II::isX86_64ExtendedReg(MO.getReg()); Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -1732,8 +1732,8 @@ /// would clobber the EFLAGS condition register. Note the result may be /// conservative. If it cannot definitely determine the safety after visiting /// a few instructions in each direction it assumes it's not safe. -static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) { +bool X86InstrInfo::isSafeToClobberEFLAGS(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { MachineBasicBlock::iterator E = MBB.end(); // For compile time consideration, if we are not able to determine the Index: lib/Target/X86/X86Subtarget.h =================================================================== --- lib/Target/X86/X86Subtarget.h +++ lib/Target/X86/X86Subtarget.h @@ -178,6 +178,9 @@ /// address generation (AG) time. bool LEAUsesAG; + /// SlowLEA - True if the LEA instruction with certain arguments is slow + bool SlowLEA; + /// Processor has AVX-512 PreFetch Instructions bool HasPFI; @@ -315,11 +318,13 @@ bool padShortFunctions() const { return PadShortFunctions; } bool callRegIndirect() const { return CallRegIndirect; } bool LEAusesAG() const { return LEAUsesAG; } + bool slowLEA() const { return SlowLEA; } bool hasCDI() const { return HasCDI; } bool hasPFI() const { return HasPFI; } bool hasERI() const { return HasERI; } bool isAtom() const { return X86ProcFamily == IntelAtom; } + bool isSLM() const { return X86ProcFamily == IntelSLM; } const Triple &getTargetTriple() const { return TargetTriple; } Index: lib/Target/X86/X86Subtarget.cpp =================================================================== --- lib/Target/X86/X86Subtarget.cpp +++ lib/Target/X86/X86Subtarget.cpp @@ -282,6 +282,7 @@ PadShortFunctions = false; CallRegIndirect = false; LEAUsesAG = false; + SlowLEA = false; stackAlignment = 4; // FIXME: this is a known good value for Yonah. How about others? MaxInlineSizeThreshold = 128; Index: lib/Target/X86/X86TargetMachine.cpp =================================================================== --- lib/Target/X86/X86TargetMachine.cpp +++ lib/Target/X86/X86TargetMachine.cpp @@ -226,7 +226,8 @@ ShouldPrint = true; } if (getOptLevel() != CodeGenOpt::None && - getX86Subtarget().LEAusesAG()){ + (getX86Subtarget().LEAusesAG() || + getX86Subtarget().slowLEA())){ addPass(createX86FixupLEAs()); ShouldPrint = true; }