Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -4083,6 +4083,20 @@ return true; } + +static bool expandSHXDROT(MachineInstrBuilder &MIB, const MCInstrDesc &Desc) { + MIB->setDesc(Desc); + int64_t ShiftAmt = MIB->getOperand(2).getImm(); + // Temporarily remove the immediate so we can add another source register. + MIB->RemoveOperand(2); + // Add the register. Don't copy the kill flag if there is one. + MIB.addReg(MIB->getOperand(1).getReg(), + getUndefRegState(MIB->getOperand(1).isUndef())); + // Add back the immediate. + MIB.addImm(ShiftAmt); + return true; +} + bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { bool HasAVX = Subtarget.hasAVX(); MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); @@ -4237,6 +4251,14 @@ case X86::XOR64_FP: case X86::XOR32_FP: return expandXorFP(MIB, *this); + case X86::SHLDROT32ri: + return expandSHXDROT(MIB, get(X86::SHLD32rri8)); + case X86::SHLDROT64ri: + return expandSHXDROT(MIB, get(X86::SHLD64rri8)); + case X86::SHRDROT32ri: + return expandSHXDROT(MIB, get(X86::SHRD32rri8)); + case X86::SHRDROT64ri: + return expandSHXDROT(MIB, get(X86::SHRD64rri8)); } return false; } Index: lib/Target/X86/X86InstrShiftRotate.td =================================================================== --- lib/Target/X86/X86InstrShiftRotate.td +++ lib/Target/X86/X86InstrShiftRotate.td @@ -836,16 +836,24 @@ // Sandy Bridge and newer Intel processors support faster rotates using // SHLD to avoid a partial flag update on the normal rotate instructions. -let Predicates = [HasFastSHLDRotate], AddedComplexity = 5 in { - def : Pat<(rotl GR32:$src, (i8 imm:$shamt)), - (SHLD32rri8 GR32:$src, GR32:$src, imm:$shamt)>; - def : Pat<(rotl GR64:$src, (i8 imm:$shamt)), - (SHLD64rri8 GR64:$src, GR64:$src, imm:$shamt)>; - - def : Pat<(rotr GR32:$src, (i8 imm:$shamt)), - (SHRD32rri8 GR32:$src, GR32:$src, imm:$shamt)>; - def : Pat<(rotr GR64:$src, (i8 imm:$shamt)), - (SHRD64rri8 GR64:$src, GR64:$src, imm:$shamt)>; +// Use a pseudo so that TwoInstructionPass and register allocation will see +// this as unary instruction. +let Predicates = [HasFastSHLDRotate], AddedComplexity = 5, + Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteSHDrri], + Constraints = "$src1 = $dst" in { + def SHLDROT32ri : I<0, Pseudo, (outs GR32:$dst), + (ins GR32:$src1, u8imm:$shamt), "", + [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$shamt)))]>; + def SHLDROT64ri : I<0, Pseudo, (outs GR64:$dst), + (ins GR64:$src1, u8imm:$shamt), "", + [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$shamt)))]>; + + def SHRDROT32ri : I<0, Pseudo, (outs GR32:$dst), + (ins GR32:$src1, u8imm:$shamt), "", + [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$shamt)))]>; + def SHRDROT64ri : I<0, Pseudo, (outs GR64:$dst), + (ins GR64:$src1, u8imm:$shamt), "", + [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$shamt)))]>; } def ROT32L2R_imm8 : SDNodeXForm