diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimization.h b/llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimization.h --- a/llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimization.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimization.h @@ -17,6 +17,7 @@ namespace X86 { bool optimizeInstFromVEX3ToVEX2(MCInst &MI); bool optimizeShiftRotateWithImmediateOne(MCInst &MI); +bool optimizeRotateWithImmediate(MCInst &MI); } // namespace X86 } // namespace llvm #endif diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimization.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimization.cpp --- a/llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimization.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimization.cpp @@ -125,6 +125,7 @@ TO_IMM1(SHL16m) TO_IMM1(SHL32m) TO_IMM1(SHL64m) +#undef TO_IMM1 } MCOperand &LastOp = MI.getOperand(MI.getNumOperands() - 1); if (!LastOp.isImm() || LastOp.getImm() != 1) @@ -133,3 +134,45 @@ MI.erase(&LastOp); return true; } + +bool X86::optimizeRotateWithImmediate(MCInst &MI) { + unsigned NumOps = MI.getNumOperands(); + if (!NumOps) + return false; + MCOperand &LastOp = MI.getOperand(MI.getNumOperands() - 1); + if (!LastOp.isImm()) + return false; + int64_t Imm = LastOp.getImm(); + if (Imm == 1) + return false; + unsigned NewOpc; +#define FROM_TO(FROM, TO, N) \ + case X86::FROM: \ + if (Imm != N) \ + return false; \ + NewOpc = X86::TO; \ + break; + switch (MI.getOpcode()) { + default: + return false; + FROM_TO(ROL8ri, ROR8r1, 7) + FROM_TO(ROR8ri, ROL8r1, 7) + FROM_TO(ROL8mi, ROR8m1, 7) + FROM_TO(ROR8mi, ROL8m1, 7) + FROM_TO(ROL16ri, ROR16r1, 15) + FROM_TO(ROR16ri, ROL16r1, 15) + FROM_TO(ROL16mi, ROR16m1, 15) + FROM_TO(ROR16mi, ROL16m1, 15) + FROM_TO(ROL32ri, ROR32r1, 31) + FROM_TO(ROL32mi, ROR32m1, 31) + FROM_TO(ROR32ri, ROL32r1, 31) + FROM_TO(ROR32mi, ROL32m1, 31) + FROM_TO(ROL64ri, ROR64r1, 63) + FROM_TO(ROR64ri, ROL64r1, 63) + FROM_TO(ROL64mi, ROR64m1, 63) + FROM_TO(ROR64mi, ROL64m1, 63) + } + MI.setOpcode(NewOpc); + MI.erase(&LastOp); + return true; +} diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -37666,10 +37666,11 @@ fixShadowMBB->addSuccessor(fixShadowLoopPrepareMBB); // Do a single shift left. - unsigned ShlR1Opc = (PVT == MVT::i64) ? X86::SHL64r1 : X86::SHL32r1; + unsigned ShlR1Opc = (PVT == MVT::i64) ? X86::SHL64ri : X86::SHL32ri; Register SspAfterShlReg = MRI.createVirtualRegister(PtrRC); BuildMI(fixShadowLoopPrepareMBB, DL, TII->get(ShlR1Opc), SspAfterShlReg) - .addReg(SspSecondShrReg); + .addReg(SspSecondShrReg) + .addImm(1); // Save the value 128 to a register (will be used next with incssp). Register Value128InReg = MRI.createVirtualRegister(PtrRC); diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -4197,9 +4197,6 @@ case X86::SBB16rr: case X86::SBB8rr: case X86::SBB64rm: case X86::SBB32rm: case X86::SBB16rm: case X86::SBB8rm: case X86::NEG8r: case X86::NEG16r: case X86::NEG32r: case X86::NEG64r: - case X86::SAR8r1: case X86::SAR16r1: case X86::SAR32r1:case X86::SAR64r1: - case X86::SHR8r1: case X86::SHR16r1: case X86::SHR32r1:case X86::SHR64r1: - case X86::SHL8r1: case X86::SHL16r1: case X86::SHL32r1:case X86::SHL64r1: case X86::LZCNT16rr: case X86::LZCNT16rm: case X86::LZCNT32rr: case X86::LZCNT32rm: case X86::LZCNT64rr: case X86::LZCNT64rm: diff --git a/llvm/lib/Target/X86/X86InstrShiftRotate.td b/llvm/lib/Target/X86/X86InstrShiftRotate.td --- a/llvm/lib/Target/X86/X86InstrShiftRotate.td +++ b/llvm/lib/Target/X86/X86InstrShiftRotate.td @@ -12,7 +12,7 @@ // FIXME: Someone needs to smear multipattern goodness all over this file. -let Defs = [EFLAGS] in { +let Defs = [EFLAGS], hasSideEffects = 0 in { let Constraints = "$src1 = $dst" in { let Uses = [CL], SchedRW = [WriteShiftCL] in { @@ -50,9 +50,6 @@ [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))]>; } // isConvertibleToThreeAddress = 1 -// NOTE: We don't include patterns for shifts of a register by one, because -// 'add reg,reg' is cheaper (and we have a Pat pattern for shift-by-one). -let hasSideEffects = 0 in { def SHL8r1 : I<0xD0, MRM4r, (outs GR8:$dst), (ins GR8:$src1), "shl{b}\t$dst", []>; def SHL16r1 : I<0xD1, MRM4r, (outs GR16:$dst), (ins GR16:$src1), @@ -61,7 +58,6 @@ "shl{l}\t$dst", []>, OpSize32; def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1), "shl{q}\t$dst", []>; -} // hasSideEffects = 0 } // SchedRW } // Constraints = "$src = $dst" @@ -85,7 +81,7 @@ Requires<[In64BitMode]>; } // Uses, SchedRW -let SchedRW = [WriteShiftLd, WriteRMW] in { +let SchedRW = [WriteShiftLd, WriteRMW], mayLoad = 1, mayStore = 1 in { def SHL8mi : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, u8imm:$src), "shl{b}\t{$src, $dst|$dst, $src}", [(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>; @@ -104,21 +100,14 @@ // Shift by 1 def SHL8m1 : I<0xD0, MRM4m, (outs), (ins i8mem :$dst), - "shl{b}\t$dst", - [(store (shl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; + "shl{b}\t$dst", []>; def SHL16m1 : I<0xD1, MRM4m, (outs), (ins i16mem:$dst), - "shl{w}\t$dst", - [(store (shl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>, - OpSize16; + "shl{w}\t$dst", []>, OpSize16; def SHL32m1 : I<0xD1, MRM4m, (outs), (ins i32mem:$dst), - "shl{l}\t$dst", - [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>, - OpSize32; + "shl{l}\t$dst", []>, OpSize32; def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst), - "shl{q}\t$dst", - [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>, - Requires<[In64BitMode]>; -} // SchedRW + "shl{q}\t$dst", []>, Requires<[In64BitMode]>; +} // SchedRW, mayLoad, mayStore let Constraints = "$src1 = $dst" in { let Uses = [CL], SchedRW = [WriteShiftCL] in { @@ -154,17 +143,13 @@ // Shift right by 1 def SHR8r1 : I<0xD0, MRM5r, (outs GR8:$dst), (ins GR8:$src1), - "shr{b}\t$dst", - [(set GR8:$dst, (srl GR8:$src1, (i8 1)))]>; + "shr{b}\t$dst", []>; def SHR16r1 : I<0xD1, MRM5r, (outs GR16:$dst), (ins GR16:$src1), - "shr{w}\t$dst", - [(set GR16:$dst, (srl GR16:$src1, (i8 1)))]>, OpSize16; + "shr{w}\t$dst", []>, OpSize16; def SHR32r1 : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1), - "shr{l}\t$dst", - [(set GR32:$dst, (srl GR32:$src1, (i8 1)))]>, OpSize32; + "shr{l}\t$dst", []>, OpSize32; def SHR64r1 : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1), - "shr{q}\t$dst", - [(set GR64:$dst, (srl GR64:$src1, (i8 1)))]>; + "shr{q}\t$dst", []>; } // SchedRW } // Constraints = "$src = $dst" @@ -187,7 +172,7 @@ Requires<[In64BitMode]>; } // Uses, SchedRW -let SchedRW = [WriteShiftLd, WriteRMW] in { +let SchedRW = [WriteShiftLd, WriteRMW], mayLoad = 1, mayStore = 1 in { def SHR8mi : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, u8imm:$src), "shr{b}\t{$src, $dst|$dst, $src}", [(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>; @@ -206,21 +191,15 @@ // Shift by 1 def SHR8m1 : I<0xD0, MRM5m, (outs), (ins i8mem :$dst), - "shr{b}\t$dst", - [(store (srl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; + "shr{b}\t$dst", []>; def SHR16m1 : I<0xD1, MRM5m, (outs), (ins i16mem:$dst), - "shr{w}\t$dst", - [(store (srl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>, - OpSize16; + "shr{w}\t$dst", []>, OpSize16; def SHR32m1 : I<0xD1, MRM5m, (outs), (ins i32mem:$dst), - "shr{l}\t$dst", - [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>, - OpSize32; + "shr{l}\t$dst", []>, OpSize32; def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst), - "shr{q}\t$dst", - [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>, - Requires<[In64BitMode]>; -} // SchedRW + "shr{q}\t$dst", []>, Requires<[In64BitMode]>; +} // SchedRW, mayLoad, mayStore + let Constraints = "$src1 = $dst" in { let Uses = [CL], SchedRW = [WriteShiftCL] in { @@ -259,17 +238,13 @@ // Shift by 1 def SAR8r1 : I<0xD0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1), - "sar{b}\t$dst", - [(set GR8:$dst, (sra GR8:$src1, (i8 1)))]>; + "sar{b}\t$dst", []>; def SAR16r1 : I<0xD1, MRM7r, (outs GR16:$dst), (ins GR16:$src1), - "sar{w}\t$dst", - [(set GR16:$dst, (sra GR16:$src1, (i8 1)))]>, OpSize16; + "sar{w}\t$dst", []>, OpSize16; def SAR32r1 : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1), - "sar{l}\t$dst", - [(set GR32:$dst, (sra GR32:$src1, (i8 1)))]>, OpSize32; + "sar{l}\t$dst", []>, OpSize32; def SAR64r1 : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1), - "sar{q}\t$dst", - [(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>; + "sar{q}\t$dst", []>; } // SchedRW } // Constraints = "$src = $dst" @@ -292,7 +267,7 @@ Requires<[In64BitMode]>; } // Uses, SchedRW -let SchedRW = [WriteShiftLd, WriteRMW] in { +let SchedRW = [WriteShiftLd, WriteRMW], mayLoad = 1, mayStore = 1 in { def SAR8mi : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, u8imm:$src), "sar{b}\t{$src, $dst|$dst, $src}", [(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>; @@ -311,27 +286,19 @@ // Shift by 1 def SAR8m1 : I<0xD0, MRM7m, (outs), (ins i8mem :$dst), - "sar{b}\t$dst", - [(store (sra (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; + "sar{b}\t$dst", []>; def SAR16m1 : I<0xD1, MRM7m, (outs), (ins i16mem:$dst), - "sar{w}\t$dst", - [(store (sra (loadi16 addr:$dst), (i8 1)), addr:$dst)]>, - OpSize16; + "sar{w}\t$dst", []>, OpSize16; def SAR32m1 : I<0xD1, MRM7m, (outs), (ins i32mem:$dst), - "sar{l}\t$dst", - [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)]>, - OpSize32; + "sar{l}\t$dst", []>, OpSize32; def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst), - "sar{q}\t$dst", - [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)]>, - Requires<[In64BitMode]>; + "sar{q}\t$dst", []>, Requires<[In64BitMode]>; } // SchedRW //===----------------------------------------------------------------------===// // Rotate instructions //===----------------------------------------------------------------------===// -let hasSideEffects = 0 in { let Constraints = "$src1 = $dst" in { let Uses = [CL, EFLAGS], SchedRW = [WriteRotateCL] in { @@ -393,7 +360,6 @@ def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, u8imm:$cnt), "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; } // Uses = [EFLAGS], SchedRW - } // Constraints = "$src = $dst" let mayLoad = 1, mayStore = 1 in { @@ -457,7 +423,6 @@ Requires<[In64BitMode]>; } // Uses = [CL, EFLAGS], SchedRW } // mayLoad, mayStore -} // hasSideEffects = 0 let Constraints = "$src1 = $dst" in { // FIXME: provide shorter instructions when imm8 == 1 @@ -495,17 +460,13 @@ // Rotate by 1 def ROL8r1 : I<0xD0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), - "rol{b}\t$dst", - [(set GR8:$dst, (rotl GR8:$src1, (i8 1)))]>; + "rol{b}\t$dst", []>; def ROL16r1 : I<0xD1, MRM0r, (outs GR16:$dst), (ins GR16:$src1), - "rol{w}\t$dst", - [(set GR16:$dst, (rotl GR16:$src1, (i8 1)))]>, OpSize16; + "rol{w}\t$dst", []>, OpSize16; def ROL32r1 : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1), - "rol{l}\t$dst", - [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>, OpSize32; + "rol{l}\t$dst", []>, OpSize32; def ROL64r1 : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1), - "rol{q}\t$dst", - [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>; + "rol{q}\t$dst", []>; } // SchedRW } // Constraints = "$src = $dst" @@ -525,7 +486,7 @@ Requires<[In64BitMode]>; } // Uses, SchedRW -let SchedRW = [WriteRotateLd, WriteRMW] in { +let SchedRW = [WriteRotateLd, WriteRMW], mayLoad = 1, mayStore = 1 in { def ROL8mi : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, u8imm:$src1), "rol{b}\t{$src1, $dst|$dst, $src1}", [(store (rotl (loadi8 addr:$dst), (i8 imm:$src1)), addr:$dst)]>; @@ -544,21 +505,14 @@ // Rotate by 1 def ROL8m1 : I<0xD0, MRM0m, (outs), (ins i8mem :$dst), - "rol{b}\t$dst", - [(store (rotl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; + "rol{b}\t$dst", []>; def ROL16m1 : I<0xD1, MRM0m, (outs), (ins i16mem:$dst), - "rol{w}\t$dst", - [(store (rotl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>, - OpSize16; + "rol{w}\t$dst", []>, OpSize16; def ROL32m1 : I<0xD1, MRM0m, (outs), (ins i32mem:$dst), - "rol{l}\t$dst", - [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>, - OpSize32; + "rol{l}\t$dst", []>, OpSize32; def ROL64m1 : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst), - "rol{q}\t$dst", - [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>, - Requires<[In64BitMode]>; -} // SchedRW + "rol{q}\t$dst", []>, Requires<[In64BitMode]>; +} // SchedRW, mayLoad, mayStore let Constraints = "$src1 = $dst" in { let Uses = [CL], SchedRW = [WriteRotateCL] in { @@ -595,17 +549,13 @@ // Rotate by 1 def ROR8r1 : I<0xD0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), - "ror{b}\t$dst", - [(set GR8:$dst, (rotr GR8:$src1, (i8 1)))]>; + "ror{b}\t$dst", []>; def ROR16r1 : I<0xD1, MRM1r, (outs GR16:$dst), (ins GR16:$src1), - "ror{w}\t$dst", - [(set GR16:$dst, (rotr GR16:$src1, (i8 1)))]>, OpSize16; + "ror{w}\t$dst", []>, OpSize16; def ROR32r1 : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1), - "ror{l}\t$dst", - [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>, OpSize32; + "ror{l}\t$dst", []>, OpSize32; def ROR64r1 : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1), - "ror{q}\t$dst", - [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>; + "ror{q}\t$dst", []>; } // SchedRW } // Constraints = "$src = $dst", SchedRW @@ -625,7 +575,7 @@ Requires<[In64BitMode]>; } // Uses, SchedRW -let SchedRW = [WriteRotateLd, WriteRMW] in { +let SchedRW = [WriteRotateLd, WriteRMW], mayLoad = 1, mayStore =1 in { def ROR8mi : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, u8imm:$src), "ror{b}\t{$src, $dst|$dst, $src}", [(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>; @@ -644,21 +594,15 @@ // Rotate by 1 def ROR8m1 : I<0xD0, MRM1m, (outs), (ins i8mem :$dst), - "ror{b}\t$dst", - [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; + "ror{b}\t$dst", []>; def ROR16m1 : I<0xD1, MRM1m, (outs), (ins i16mem:$dst), - "ror{w}\t$dst", - [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)]>, - OpSize16; + "ror{w}\t$dst", []>, OpSize16; def ROR32m1 : I<0xD1, MRM1m, (outs), (ins i32mem:$dst), - "ror{l}\t$dst", - [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>, + "ror{l}\t$dst", []>, OpSize32; def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst), - "ror{q}\t$dst", - [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)]>, - Requires<[In64BitMode]>; -} // SchedRW + "ror{q}\t$dst", []>, Requires<[In64BitMode]>; +} // SchedRW, mayLoad, mayStore //===----------------------------------------------------------------------===// @@ -816,35 +760,7 @@ TB; } // SchedRW -} // Defs = [EFLAGS] - -// Use the opposite rotate if allows us to use the rotate by 1 instruction. -def : Pat<(rotl GR8:$src1, (i8 7)), (ROR8r1 GR8:$src1)>; -def : Pat<(rotl GR16:$src1, (i8 15)), (ROR16r1 GR16:$src1)>; -def : Pat<(rotl GR32:$src1, (i8 31)), (ROR32r1 GR32:$src1)>; -def : Pat<(rotl GR64:$src1, (i8 63)), (ROR64r1 GR64:$src1)>; -def : Pat<(rotr GR8:$src1, (i8 7)), (ROL8r1 GR8:$src1)>; -def : Pat<(rotr GR16:$src1, (i8 15)), (ROL16r1 GR16:$src1)>; -def : Pat<(rotr GR32:$src1, (i8 31)), (ROL32r1 GR32:$src1)>; -def : Pat<(rotr GR64:$src1, (i8 63)), (ROL64r1 GR64:$src1)>; - -def : Pat<(store (rotl (loadi8 addr:$dst), (i8 7)), addr:$dst), - (ROR8m1 addr:$dst)>; -def : Pat<(store (rotl (loadi16 addr:$dst), (i8 15)), addr:$dst), - (ROR16m1 addr:$dst)>; -def : Pat<(store (rotl (loadi32 addr:$dst), (i8 31)), addr:$dst), - (ROR32m1 addr:$dst)>; -def : Pat<(store (rotl (loadi64 addr:$dst), (i8 63)), addr:$dst), - (ROR64m1 addr:$dst)>, Requires<[In64BitMode]>; - -def : Pat<(store (rotr (loadi8 addr:$dst), (i8 7)), addr:$dst), - (ROL8m1 addr:$dst)>; -def : Pat<(store (rotr (loadi16 addr:$dst), (i8 15)), addr:$dst), - (ROL16m1 addr:$dst)>; -def : Pat<(store (rotr (loadi32 addr:$dst), (i8 31)), addr:$dst), - (ROL32m1 addr:$dst)>; -def : Pat<(store (rotr (loadi64 addr:$dst), (i8 63)), addr:$dst), - (ROL64m1 addr:$dst)>, Requires<[In64BitMode]>; +} // Defs = [EFLAGS], hasSideEffects // Sandy Bridge and newer Intel processors support faster rotates using // SHLD to avoid a partial flag update on the normal rotate instructions. diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -505,6 +505,11 @@ if (X86::optimizeInstFromVEX3ToVEX2(OutMI)) return; + X86::optimizeRotateWithImmediate(OutMI); + + if (X86::optimizeShiftRotateWithImmediateOne(OutMI)) + return; + // Handle a few special cases to eliminate operand modifiers. switch (OutMI.getOpcode()) { case X86::LEA64_32r: diff --git a/llvm/test/CodeGen/X86/GlobalISel/ashr-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/ashr-scalar.ll --- a/llvm/test/CodeGen/X86/GlobalISel/ashr-scalar.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/ashr-scalar.ll @@ -167,7 +167,8 @@ ; X64-NEXT: movl %edi, %eax ; X64-NEXT: shlb $7, %al ; X64-NEXT: sarb $7, %al -; X64-NEXT: sarb %al +; X64-NEXT: movb $1, %cl +; X64-NEXT: sarb %cl, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %a = trunc i32 %arg1 to i1 diff --git a/llvm/test/CodeGen/X86/GlobalISel/lshr-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/lshr-scalar.ll --- a/llvm/test/CodeGen/X86/GlobalISel/lshr-scalar.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/lshr-scalar.ll @@ -165,7 +165,8 @@ ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax ; X64-NEXT: andb $1, %al -; X64-NEXT: shrb %al +; X64-NEXT: movb $1, %cl +; X64-NEXT: shrb %cl, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %a = trunc i32 %arg1 to i1 diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-ashr-scalar.mir b/llvm/test/CodeGen/X86/GlobalISel/select-ashr-scalar.mir --- a/llvm/test/CodeGen/X86/GlobalISel/select-ashr-scalar.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/select-ashr-scalar.mir @@ -12,11 +12,6 @@ ret i64 %res } - define i64 @test_ashr_i64_imm1(i64 %arg1) { - %res = ashr i64 %arg1, 1 - ret i64 %res - } - define i32 @test_ashr_i32(i32 %arg1, i32 %arg2) { %res = ashr i32 %arg1, %arg2 ret i32 %res @@ -27,11 +22,6 @@ ret i32 %res } - define i32 @test_ashr_i32_imm1(i32 %arg1) { - %res = ashr i32 %arg1, 1 - ret i32 %res - } - define i16 @test_ashr_i16(i32 %arg1, i32 %arg2) { %a = trunc i32 %arg1 to i16 %a2 = trunc i32 %arg2 to i16 @@ -45,12 +35,6 @@ ret i16 %res } - define i16 @test_ashr_i16_imm1(i32 %arg1) { - %a = trunc i32 %arg1 to i16 - %res = ashr i16 %a, 1 - ret i16 %res - } - define i8 @test_ashr_i8(i32 %arg1, i32 %arg2) { %a = trunc i32 %arg1 to i8 %a2 = trunc i32 %arg2 to i8 @@ -63,12 +47,6 @@ %res = ashr i8 %a, 5 ret i8 %res } - - define i8 @test_ashr_i8_imm1(i32 %arg1) { - %a = trunc i32 %arg1 to i8 - %res = ashr i8 %a, 1 - ret i8 %res - } ... --- name: test_ashr_i64 @@ -136,37 +114,6 @@ $rax = COPY %2(s64) RET 0, implicit $rax -... ---- -name: test_ashr_i64_imm1 -alignment: 16 -legalized: true -regBankSelected: true -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } -liveins: -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $rdi - - ; ALL-LABEL: name: test_ashr_i64_imm1 - ; ALL: liveins: $rdi - ; ALL: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; ALL: [[SAR64r1_:%[0-9]+]]:gr64 = SAR64r1 [[COPY]], implicit-def $eflags - ; ALL: $rax = COPY [[SAR64r1_]] - ; ALL: RET 0, implicit $rax - %0(s64) = COPY $rdi - %1(s8) = G_CONSTANT i8 1 - %2(s64) = G_ASHR %0, %1 - $rax = COPY %2(s64) - RET 0, implicit $rax - ... --- name: test_ashr_i32 @@ -234,37 +181,6 @@ $eax = COPY %2(s32) RET 0, implicit $eax -... ---- -name: test_ashr_i32_imm1 -alignment: 16 -legalized: true -regBankSelected: true -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } -liveins: -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi - - ; ALL-LABEL: name: test_ashr_i32_imm1 - ; ALL: liveins: $edi - ; ALL: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; ALL: [[SAR32r1_:%[0-9]+]]:gr32 = SAR32r1 [[COPY]], implicit-def $eflags - ; ALL: $eax = COPY [[SAR32r1_]] - ; ALL: RET 0, implicit $eax - %0(s32) = COPY $edi - %1(s8) = G_CONSTANT i8 1 - %2(s32) = G_ASHR %0, %1 - $eax = COPY %2(s32) - RET 0, implicit $eax - ... --- name: test_ashr_i16 @@ -338,40 +254,6 @@ $ax = COPY %3(s16) RET 0, implicit $ax -... ---- -name: test_ashr_i16_imm1 -alignment: 16 -legalized: true -regBankSelected: true -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } - - { id: 3, class: gpr, preferred-register: '' } -liveins: -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi - - ; ALL-LABEL: name: test_ashr_i16_imm1 - ; ALL: liveins: $edi - ; ALL: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; ALL: [[COPY1:%[0-9]+]]:gr16 = COPY [[COPY]].sub_16bit - ; ALL: [[SAR16r1_:%[0-9]+]]:gr16 = SAR16r1 [[COPY1]], implicit-def $eflags - ; ALL: $ax = COPY [[SAR16r1_]] - ; ALL: RET 0, implicit $ax - %0(s32) = COPY $edi - %2(s8) = G_CONSTANT i8 1 - %1(s16) = G_TRUNC %0(s32) - %3(s16) = G_ASHR %1, %2 - $ax = COPY %3(s16) - RET 0, implicit $ax - ... --- name: test_ashr_i8 @@ -446,37 +328,3 @@ RET 0, implicit $al ... ---- -name: test_ashr_i8_imm1 -alignment: 16 -legalized: true -regBankSelected: true -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } - - { id: 3, class: gpr, preferred-register: '' } -liveins: -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi - - ; ALL-LABEL: name: test_ashr_i8_imm1 - ; ALL: liveins: $edi - ; ALL: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; ALL: [[COPY1:%[0-9]+]]:gr8 = COPY [[COPY]].sub_8bit - ; ALL: [[SAR8r1_:%[0-9]+]]:gr8 = SAR8r1 [[COPY1]], implicit-def $eflags - ; ALL: $al = COPY [[SAR8r1_]] - ; ALL: RET 0, implicit $al - %0(s32) = COPY $edi - %2(s8) = G_CONSTANT i8 1 - %1(s8) = G_TRUNC %0(s32) - %3(s8) = G_ASHR %1, %2 - $al = COPY %3(s8) - RET 0, implicit $al - -... diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-lshr-scalar.mir b/llvm/test/CodeGen/X86/GlobalISel/select-lshr-scalar.mir --- a/llvm/test/CodeGen/X86/GlobalISel/select-lshr-scalar.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/select-lshr-scalar.mir @@ -12,11 +12,6 @@ ret i64 %res } - define i64 @test_lshr_i64_imm1(i64 %arg1) { - %res = lshr i64 %arg1, 1 - ret i64 %res - } - define i32 @test_lshr_i32(i32 %arg1, i32 %arg2) { %res = lshr i32 %arg1, %arg2 ret i32 %res @@ -27,11 +22,6 @@ ret i32 %res } - define i32 @test_lshr_i32_imm1(i32 %arg1) { - %res = lshr i32 %arg1, 1 - ret i32 %res - } - define i16 @test_lshr_i16(i32 %arg1, i32 %arg2) { %a = trunc i32 %arg1 to i16 %a2 = trunc i32 %arg2 to i16 @@ -45,12 +35,6 @@ ret i16 %res } - define i16 @test_lshr_i16_imm1(i32 %arg1) { - %a = trunc i32 %arg1 to i16 - %res = lshr i16 %a, 1 - ret i16 %res - } - define i8 @test_lshr_i8(i32 %arg1, i32 %arg2) { %a = trunc i32 %arg1 to i8 %a2 = trunc i32 %arg2 to i8 @@ -64,11 +48,6 @@ ret i8 %res } - define i8 @test_lshr_i8_imm1(i32 %arg1) { - %a = trunc i32 %arg1 to i8 - %res = lshr i8 %a, 1 - ret i8 %res - } ... --- name: test_lshr_i64 @@ -136,37 +115,6 @@ $rax = COPY %2(s64) RET 0, implicit $rax -... ---- -name: test_lshr_i64_imm1 -alignment: 16 -legalized: true -regBankSelected: true -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } -liveins: -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $rdi - - ; ALL-LABEL: name: test_lshr_i64_imm1 - ; ALL: liveins: $rdi - ; ALL: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; ALL: [[SHR64r1_:%[0-9]+]]:gr64 = SHR64r1 [[COPY]], implicit-def $eflags - ; ALL: $rax = COPY [[SHR64r1_]] - ; ALL: RET 0, implicit $rax - %0(s64) = COPY $rdi - %1(s8) = G_CONSTANT i8 1 - %2(s64) = G_LSHR %0, %1 - $rax = COPY %2(s64) - RET 0, implicit $rax - ... --- name: test_lshr_i32 @@ -234,37 +182,6 @@ $eax = COPY %2(s32) RET 0, implicit $eax -... ---- -name: test_lshr_i32_imm1 -alignment: 16 -legalized: true -regBankSelected: true -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } -liveins: -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi - - ; ALL-LABEL: name: test_lshr_i32_imm1 - ; ALL: liveins: $edi - ; ALL: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; ALL: [[SHR32r1_:%[0-9]+]]:gr32 = SHR32r1 [[COPY]], implicit-def $eflags - ; ALL: $eax = COPY [[SHR32r1_]] - ; ALL: RET 0, implicit $eax - %0(s32) = COPY $edi - %1(s8) = G_CONSTANT i8 1 - %2(s32) = G_LSHR %0, %1 - $eax = COPY %2(s32) - RET 0, implicit $eax - ... --- name: test_lshr_i16 @@ -338,40 +255,6 @@ $ax = COPY %3(s16) RET 0, implicit $ax -... ---- -name: test_lshr_i16_imm1 -alignment: 16 -legalized: true -regBankSelected: true -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } - - { id: 3, class: gpr, preferred-register: '' } -liveins: -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi - - ; ALL-LABEL: name: test_lshr_i16_imm1 - ; ALL: liveins: $edi - ; ALL: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; ALL: [[COPY1:%[0-9]+]]:gr16 = COPY [[COPY]].sub_16bit - ; ALL: [[SHR16r1_:%[0-9]+]]:gr16 = SHR16r1 [[COPY1]], implicit-def $eflags - ; ALL: $ax = COPY [[SHR16r1_]] - ; ALL: RET 0, implicit $ax - %0(s32) = COPY $edi - %2(s8) = G_CONSTANT i8 1 - %1(s16) = G_TRUNC %0(s32) - %3(s16) = G_LSHR %1, %2 - $ax = COPY %3(s16) - RET 0, implicit $ax - ... --- name: test_lshr_i8 @@ -446,37 +329,3 @@ RET 0, implicit $al ... ---- -name: test_lshr_i8_imm1 -alignment: 16 -legalized: true -regBankSelected: true -tracksRegLiveness: true -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: gpr, preferred-register: '' } - - { id: 3, class: gpr, preferred-register: '' } -liveins: -fixedStack: -stack: -constants: -body: | - bb.1 (%ir-block.0): - liveins: $edi - - ; ALL-LABEL: name: test_lshr_i8_imm1 - ; ALL: liveins: $edi - ; ALL: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; ALL: [[COPY1:%[0-9]+]]:gr8 = COPY [[COPY]].sub_8bit - ; ALL: [[SHR8r1_:%[0-9]+]]:gr8 = SHR8r1 [[COPY1]], implicit-def $eflags - ; ALL: $al = COPY [[SHR8r1_]] - ; ALL: RET 0, implicit $al - %0(s32) = COPY $edi - %2(s8) = G_CONSTANT i8 1 - %1(s8) = G_TRUNC %0(s32) - %3(s8) = G_LSHR %1, %2 - $al = COPY %3(s8) - RET 0, implicit $al - -... diff --git a/llvm/test/CodeGen/X86/fast-isel-shift.ll b/llvm/test/CodeGen/X86/fast-isel-shift.ll --- a/llvm/test/CodeGen/X86/fast-isel-shift.ll +++ b/llvm/test/CodeGen/X86/fast-isel-shift.ll @@ -199,7 +199,7 @@ ; CHECK-LABEL: lshr_imm1_i8: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: shrb $1, %al +; CHECK-NEXT: shrb %al ; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %c = lshr i8 %a, 1 @@ -210,7 +210,7 @@ ; CHECK-LABEL: lshr_imm1_i16: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: shrw $1, %ax +; CHECK-NEXT: shrw %ax ; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %c = lshr i16 %a, 1 @@ -221,7 +221,7 @@ ; CHECK-LABEL: lshr_imm1_i32: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: shrl $1, %eax +; CHECK-NEXT: shrl %eax ; CHECK-NEXT: retq %c = lshr i32 %a, 1 ret i32 %c @@ -231,7 +231,7 @@ ; CHECK-LABEL: lshr_imm1_i64: ; CHECK: ## %bb.0: ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: shrq $1, %rax +; CHECK-NEXT: shrq %rax ; CHECK-NEXT: retq %c = lshr i64 %a, 1 ret i64 %c @@ -241,7 +241,7 @@ ; CHECK-LABEL: ashr_imm1_i8: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: sarb $1, %al +; CHECK-NEXT: sarb %al ; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %c = ashr i8 %a, 1 @@ -252,7 +252,7 @@ ; CHECK-LABEL: ashr_imm1_i16: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: sarw $1, %ax +; CHECK-NEXT: sarw %ax ; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %c = ashr i16 %a, 1 @@ -263,7 +263,7 @@ ; CHECK-LABEL: ashr_imm1_i32: ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: sarl $1, %eax +; CHECK-NEXT: sarl %eax ; CHECK-NEXT: retq %c = ashr i32 %a, 1 ret i32 %c @@ -273,7 +273,7 @@ ; CHECK-LABEL: ashr_imm1_i64: ; CHECK: ## %bb.0: ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: sarq $1, %rax +; CHECK-NEXT: sarq %rax ; CHECK-NEXT: retq %c = ashr i64 %a, 1 ret i64 %c diff --git a/llvm/test/CodeGen/X86/gpr-to-mask.ll b/llvm/test/CodeGen/X86/gpr-to-mask.ll --- a/llvm/test/CodeGen/X86/gpr-to-mask.ll +++ b/llvm/test/CodeGen/X86/gpr-to-mask.ll @@ -320,13 +320,12 @@ ; X86-64-NEXT: testb $1, %dil ; X86-64-NEXT: je .LBB6_2 ; X86-64-NEXT: # %bb.1: # %if -; X86-64-NEXT: movzbl (%rsi), %eax -; X86-64-NEXT: shrb %al +; X86-64-NEXT: kmovb (%rsi), %k0 +; X86-64-NEXT: kshiftrb $1, %k0, %k1 ; X86-64-NEXT: jmp .LBB6_3 ; X86-64-NEXT: .LBB6_2: # %else -; X86-64-NEXT: movzbl (%rdx), %eax +; X86-64-NEXT: kmovb (%rdx), %k1 ; X86-64-NEXT: .LBB6_3: # %exit -; X86-64-NEXT: kmovd %eax, %k1 ; X86-64-NEXT: vmovaps %zmm0, %zmm1 {%k1} ; X86-64-NEXT: vmovaps %ymm1, (%rcx) ; X86-64-NEXT: vzeroupper @@ -341,14 +340,13 @@ ; X86-32-NEXT: je .LBB6_2 ; X86-32-NEXT: # %bb.1: # %if ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-32-NEXT: movzbl (%ecx), %ecx -; X86-32-NEXT: shrb %cl +; X86-32-NEXT: kmovb (%ecx), %k0 +; X86-32-NEXT: kshiftrb $1, %k0, %k1 ; X86-32-NEXT: jmp .LBB6_3 ; X86-32-NEXT: .LBB6_2: # %else ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-32-NEXT: movzbl (%ecx), %ecx +; X86-32-NEXT: kmovb (%ecx), %k1 ; X86-32-NEXT: .LBB6_3: # %exit -; X86-32-NEXT: kmovd %ecx, %k1 ; X86-32-NEXT: vmovaps %zmm0, %zmm1 {%k1} ; X86-32-NEXT: vmovaps %ymm1, (%eax) ; X86-32-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/is_fpclass.ll b/llvm/test/CodeGen/X86/is_fpclass.ll --- a/llvm/test/CodeGen/X86/is_fpclass.ll +++ b/llvm/test/CodeGen/X86/is_fpclass.ll @@ -1411,7 +1411,7 @@ ; CHECK-64-LABEL: iszero_d_strictfp: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: movq %xmm0, %rax -; CHECK-64-NEXT: shlq $1, %rax +; CHECK-64-NEXT: shlq %rax ; CHECK-64-NEXT: testq %rax, %rax ; CHECK-64-NEXT: sete %al ; CHECK-64-NEXT: retq