diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -33,10 +33,11 @@ /// Bit scan reverse. BSR, - /// Double shift instructions. These correspond to - /// X86::SHLDxx and X86::SHRDxx instructions. - SHLD, - SHRD, + /// X86 funnel/double shift i16 instructions. These correspond to + /// X86::SHLDWx and X86::SHRDWx instructions which have different amt + /// modulo rules to generic funnel shifts. + FSHL, + FSHR, /// Bitwise logical AND of floating point values. This corresponds /// to X86::ANDPS or X86::ANDPD. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -207,10 +207,13 @@ // Funnel shifts. for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) { + // For slow shld targets we only lower for code size. + LegalizeAction ShiftDoubleAction = Subtarget.isSHLDSlow() ? Custom : Legal; + setOperationAction(ShiftOp , MVT::i16 , Custom); - setOperationAction(ShiftOp , MVT::i32 , Custom); + setOperationAction(ShiftOp , MVT::i32 , ShiftDoubleAction); if (Subtarget.is64Bit()) - setOperationAction(ShiftOp , MVT::i64 , Custom); + setOperationAction(ShiftOp , MVT::i64 , ShiftDoubleAction); } if (!Subtarget.useSoftFloat()) { @@ -18836,16 +18839,15 @@ if (!OptForSize && Subtarget.isSHLDSlow()) return SDValue(); - if (IsFSHR) - std::swap(Op0, Op1); - // i16 needs to modulo the shift amount, but i32/i64 have implicit modulo. - if (VT == MVT::i16) + if (VT == MVT::i16) { Amt = DAG.getNode(ISD::AND, DL, Amt.getValueType(), Amt, DAG.getConstant(15, DL, Amt.getValueType())); + unsigned FSHOp = (IsFSHR ? X86ISD::FSHR : X86ISD::FSHL); + return DAG.getNode(FSHOp, DL, VT, Op0, Op1, Amt); + } - unsigned SHDOp = (IsFSHR ? X86ISD::SHRD : X86ISD::SHLD); - return DAG.getNode(SHDOp, DL, VT, Op0, Op1, Amt); + return Op; } // Try to use a packed vector operation to handle i64 on 32-bit targets when @@ -29939,8 +29941,8 @@ #define NODE_NAME_CASE(NODE) case X86ISD::NODE: return "X86ISD::" #NODE; NODE_NAME_CASE(BSF) NODE_NAME_CASE(BSR) - NODE_NAME_CASE(SHLD) - NODE_NAME_CASE(SHRD) + NODE_NAME_CASE(FSHL) + NODE_NAME_CASE(FSHR) NODE_NAME_CASE(FAND) NODE_NAME_CASE(FANDN) NODE_NAME_CASE(FOR) diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1782,21 +1782,24 @@ defm : MaskedRotateAmountPats; defm : MaskedRotateAmountPats; -// Double shift amount is implicitly masked. -multiclass MaskedDoubleShiftAmountPats { - // (shift x (and y, 31)) ==> (shift x, y) - def : Pat<(frag GR16:$src1, GR16:$src2, (shiftMask32 CL)), - (!cast(name # "16rrCL") GR16:$src1, GR16:$src2)>; - def : Pat<(frag GR32:$src1, GR32:$src2, (shiftMask32 CL)), - (!cast(name # "32rrCL") GR32:$src1, GR32:$src2)>; - - // (shift x (and y, 63)) ==> (shift x, y) - def : Pat<(frag GR64:$src1, GR64:$src2, (shiftMask32 CL)), - (!cast(name # "64rrCL") GR64:$src1, GR64:$src2)>; -} - -defm : MaskedDoubleShiftAmountPats; -defm : MaskedDoubleShiftAmountPats; +// Double "funnel" shift amount is implicitly masked. +// (fshl/fshr x (and y, 31)) ==> (fshl/fshr x, y) (NOTE: modulo32) +def : Pat<(X86fshl GR16:$src1, GR16:$src2, (shiftMask32 CL)), + (SHLD16rrCL GR16:$src1, GR16:$src2)>; +def : Pat<(X86fshr GR16:$src2, GR16:$src1, (shiftMask32 CL)), + (SHRD16rrCL GR16:$src1, GR16:$src2)>; + +// (fshl/fshr x (and y, 31)) ==> (fshl/fshr x, y) +def : Pat<(fshl GR32:$src1, GR32:$src2, (shiftMask32 CL)), + (SHLD32rrCL GR32:$src1, GR32:$src2)>; +def : Pat<(fshr GR32:$src2, GR32:$src1, (shiftMask32 CL)), + (SHRD32rrCL GR32:$src1, GR32:$src2)>; + +// (fshl/fshr x (and y, 63)) ==> (fshl/fshr x, y) +def : Pat<(fshl GR64:$src1, GR64:$src2, (shiftMask64 CL)), + (SHLD64rrCL GR64:$src1, GR64:$src2)>; +def : Pat<(fshr GR64:$src2, GR64:$src1, (shiftMask64 CL)), + (SHRD64rrCL GR64:$src1, GR64:$src2)>; let Predicates = [HasBMI2] in { let AddedComplexity = 1 in { diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -143,8 +143,8 @@ def X86bsf : SDNode<"X86ISD::BSF", SDTUnaryArithWithFlags>; def X86bsr : SDNode<"X86ISD::BSR", SDTUnaryArithWithFlags>; -def X86shld : SDNode<"X86ISD::SHLD", SDTIntShiftDOp>; -def X86shrd : SDNode<"X86ISD::SHRD", SDTIntShiftDOp>; +def X86fshl : SDNode<"X86ISD::FSHL", SDTIntShiftDOp>; +def X86fshr : SDNode<"X86ISD::FSHR", SDTIntShiftDOp>; def X86cmp : SDNode<"X86ISD::CMP" , SDTX86CmpTest>; def X86fcmp : SDNode<"X86ISD::FCMP", SDTX86FCmp>; diff --git a/llvm/lib/Target/X86/X86InstrShiftRotate.td b/llvm/lib/Target/X86/X86InstrShiftRotate.td --- a/llvm/lib/Target/X86/X86InstrShiftRotate.td +++ b/llvm/lib/Target/X86/X86InstrShiftRotate.td @@ -661,32 +661,32 @@ def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "shld{w}\t{%cl, $src2, $dst|$dst, $src2, cl}", - [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))]>, + [(set GR16:$dst, (X86fshl GR16:$src1, GR16:$src2, CL))]>, TB, OpSize16; def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, cl}", - [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))]>, + [(set GR16:$dst, (X86fshr GR16:$src2, GR16:$src1, CL))]>, TB, OpSize16; def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "shld{l}\t{%cl, $src2, $dst|$dst, $src2, cl}", - [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))]>, + [(set GR32:$dst, (fshl GR32:$src1, GR32:$src2, CL))]>, TB, OpSize32; def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, cl}", - [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))]>, + [(set GR32:$dst, (fshr GR32:$src2, GR32:$src1, CL))]>, TB, OpSize32; def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "shld{q}\t{%cl, $src2, $dst|$dst, $src2, cl}", - [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))]>, + [(set GR64:$dst, (fshl GR64:$src1, GR64:$src2, CL))]>, TB; def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, cl}", - [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))]>, + [(set GR64:$dst, (fshr GR64:$src2, GR64:$src1, CL))]>, TB; } // SchedRW @@ -695,42 +695,42 @@ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, u8imm:$src3), "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, + [(set GR16:$dst, (X86fshl GR16:$src1, GR16:$src2, (i8 imm:$src3)))]>, TB, OpSize16; def SHRD16rri8 : Ii8<0xAC, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, u8imm:$src3), "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, + [(set GR16:$dst, (X86fshr GR16:$src2, GR16:$src1, (i8 imm:$src3)))]>, TB, OpSize16; def SHLD32rri8 : Ii8<0xA4, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, u8imm:$src3), "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, + [(set GR32:$dst, (fshl GR32:$src1, GR32:$src2, (i8 imm:$src3)))]>, TB, OpSize32; def SHRD32rri8 : Ii8<0xAC, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, u8imm:$src3), "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, + [(set GR32:$dst, (fshr GR32:$src2, GR32:$src1, (i8 imm:$src3)))]>, TB, OpSize32; def SHLD64rri8 : RIi8<0xA4, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2, u8imm:$src3), "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, + [(set GR64:$dst, (fshl GR64:$src1, GR64:$src2, (i8 imm:$src3)))]>, TB; def SHRD64rri8 : RIi8<0xAC, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2, u8imm:$src3), "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, + [(set GR64:$dst, (fshr GR64:$src2, GR64:$src1, (i8 imm:$src3)))]>, TB; } // SchedRW @@ -739,70 +739,70 @@ let Uses = [CL], SchedRW = [WriteSHDmrcl] in { def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), "shld{w}\t{%cl, $src2, $dst|$dst, $src2, cl}", - [(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL), - addr:$dst)]>, TB, OpSize16; + [(store (X86fshl (loadi16 addr:$dst), GR16:$src2, CL), + addr:$dst)]>, TB, OpSize16; def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, cl}", - [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, CL), - addr:$dst)]>, TB, OpSize16; + [(store (X86fshr GR16:$src2, (loadi16 addr:$dst), CL), + addr:$dst)]>, TB, OpSize16; def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), "shld{l}\t{%cl, $src2, $dst|$dst, $src2, cl}", - [(store (X86shld (loadi32 addr:$dst), GR32:$src2, CL), + [(store (fshl (loadi32 addr:$dst), GR32:$src2, CL), addr:$dst)]>, TB, OpSize32; def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, cl}", - [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL), - addr:$dst)]>, TB, OpSize32; + [(store (fshr GR32:$src2, (loadi32 addr:$dst), CL), + addr:$dst)]>, TB, OpSize32; def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), "shld{q}\t{%cl, $src2, $dst|$dst, $src2, cl}", - [(store (X86shld (loadi64 addr:$dst), GR64:$src2, CL), - addr:$dst)]>, TB; + [(store (fshl (loadi64 addr:$dst), GR64:$src2, CL), + addr:$dst)]>, TB; def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, cl}", - [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, CL), - addr:$dst)]>, TB; + [(store (fshr GR64:$src2, (loadi64 addr:$dst), CL), + addr:$dst)]>, TB; } // SchedRW let SchedRW = [WriteSHDmri] in { def SHLD16mri8 : Ii8<0xA4, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2, u8imm:$src3), "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(store (X86shld (loadi16 addr:$dst), GR16:$src2, - (i8 imm:$src3)), addr:$dst)]>, + [(store (X86fshl (loadi16 addr:$dst), GR16:$src2, + (i8 imm:$src3)), addr:$dst)]>, TB, OpSize16; def SHRD16mri8 : Ii8<0xAC, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2, u8imm:$src3), "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, - (i8 imm:$src3)), addr:$dst)]>, + [(store (X86fshr GR16:$src2, (loadi16 addr:$dst), + (i8 imm:$src3)), addr:$dst)]>, TB, OpSize16; def SHLD32mri8 : Ii8<0xA4, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2, u8imm:$src3), "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(store (X86shld (loadi32 addr:$dst), GR32:$src2, - (i8 imm:$src3)), addr:$dst)]>, + [(store (fshl (loadi32 addr:$dst), GR32:$src2, + (i8 imm:$src3)), addr:$dst)]>, TB, OpSize32; def SHRD32mri8 : Ii8<0xAC, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2, u8imm:$src3), "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, - (i8 imm:$src3)), addr:$dst)]>, + [(store (fshr GR32:$src2, (loadi32 addr:$dst), + (i8 imm:$src3)), addr:$dst)]>, TB, OpSize32; def SHLD64mri8 : RIi8<0xA4, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2, u8imm:$src3), "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(store (X86shld (loadi64 addr:$dst), GR64:$src2, - (i8 imm:$src3)), addr:$dst)]>, + [(store (fshl (loadi64 addr:$dst), GR64:$src2, + (i8 imm:$src3)), addr:$dst)]>, TB; def SHRD64mri8 : RIi8<0xAC, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2, u8imm:$src3), "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, - (i8 imm:$src3)), addr:$dst)]>, + [(store (fshr GR64:$src2, (loadi64 addr:$dst), + (i8 imm:$src3)), addr:$dst)]>, TB; } // SchedRW diff --git a/llvm/test/CodeGen/X86/clear-highbits.ll b/llvm/test/CodeGen/X86/clear-highbits.ll --- a/llvm/test/CodeGen/X86/clear-highbits.ll +++ b/llvm/test/CodeGen/X86/clear-highbits.ll @@ -513,35 +513,43 @@ define i64 @clear_highbits64_c0(i64 %val, i64 %numhighbits) nounwind { ; X86-NOBMI2-LABEL: clear_highbits64_c0: ; X86-NOBMI2: # %bb.0: +; X86-NOBMI2-NEXT: pushl %esi ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl $-1, %eax -; X86-NOBMI2-NEXT: movl $-1, %edx -; X86-NOBMI2-NEXT: shrl %cl, %edx -; X86-NOBMI2-NEXT: shrdl %cl, %eax, %eax -; X86-NOBMI2-NEXT: testb $32, %cl -; X86-NOBMI2-NEXT: je .LBB13_2 -; X86-NOBMI2-NEXT: # %bb.1: -; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: movl $-1, %esi +; X86-NOBMI2-NEXT: shrl %cl, %esi ; X86-NOBMI2-NEXT: xorl %edx, %edx -; X86-NOBMI2-NEXT: .LBB13_2: +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB13_1 +; X86-NOBMI2-NEXT: # %bb.2: +; X86-NOBMI2-NEXT: movl %esi, %edx +; X86-NOBMI2-NEXT: jmp .LBB13_3 +; X86-NOBMI2-NEXT: .LBB13_1: +; X86-NOBMI2-NEXT: movl %esi, %eax +; X86-NOBMI2-NEXT: .LBB13_3: ; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: popl %esi ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: clear_highbits64_c0: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl ; X86-BMI2-NEXT: movl $-1, %eax -; X86-BMI2-NEXT: shrxl %ecx, %eax, %edx -; X86-BMI2-NEXT: shrdl %cl, %eax, %eax -; X86-BMI2-NEXT: testb $32, %cl -; X86-BMI2-NEXT: je .LBB13_2 -; X86-BMI2-NEXT: # %bb.1: -; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: shrxl %ebx, %eax, %ecx ; X86-BMI2-NEXT: xorl %edx, %edx -; X86-BMI2-NEXT: .LBB13_2: +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: jne .LBB13_1 +; X86-BMI2-NEXT: # %bb.2: +; X86-BMI2-NEXT: movl %ecx, %edx +; X86-BMI2-NEXT: jmp .LBB13_3 +; X86-BMI2-NEXT: .LBB13_1: +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: .LBB13_3: ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: popl %ebx ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: clear_highbits64_c0: @@ -566,35 +574,43 @@ define i64 @clear_highbits64_c1_indexzext(i64 %val, i8 %numhighbits) nounwind { ; X86-NOBMI2-LABEL: clear_highbits64_c1_indexzext: ; X86-NOBMI2: # %bb.0: +; X86-NOBMI2-NEXT: pushl %esi ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl $-1, %eax -; X86-NOBMI2-NEXT: movl $-1, %edx -; X86-NOBMI2-NEXT: shrl %cl, %edx -; X86-NOBMI2-NEXT: shrdl %cl, %eax, %eax -; X86-NOBMI2-NEXT: testb $32, %cl -; X86-NOBMI2-NEXT: je .LBB14_2 -; X86-NOBMI2-NEXT: # %bb.1: -; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: movl $-1, %esi +; X86-NOBMI2-NEXT: shrl %cl, %esi ; X86-NOBMI2-NEXT: xorl %edx, %edx -; X86-NOBMI2-NEXT: .LBB14_2: +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB14_1 +; X86-NOBMI2-NEXT: # %bb.2: +; X86-NOBMI2-NEXT: movl %esi, %edx +; X86-NOBMI2-NEXT: jmp .LBB14_3 +; X86-NOBMI2-NEXT: .LBB14_1: +; X86-NOBMI2-NEXT: movl %esi, %eax +; X86-NOBMI2-NEXT: .LBB14_3: ; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: popl %esi ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: clear_highbits64_c1_indexzext: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl ; X86-BMI2-NEXT: movl $-1, %eax -; X86-BMI2-NEXT: shrxl %ecx, %eax, %edx -; X86-BMI2-NEXT: shrdl %cl, %eax, %eax -; X86-BMI2-NEXT: testb $32, %cl -; X86-BMI2-NEXT: je .LBB14_2 -; X86-BMI2-NEXT: # %bb.1: -; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: shrxl %ebx, %eax, %ecx ; X86-BMI2-NEXT: xorl %edx, %edx -; X86-BMI2-NEXT: .LBB14_2: +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: jne .LBB14_1 +; X86-BMI2-NEXT: # %bb.2: +; X86-BMI2-NEXT: movl %ecx, %edx +; X86-BMI2-NEXT: jmp .LBB14_3 +; X86-BMI2-NEXT: .LBB14_1: +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: .LBB14_3: ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: popl %ebx ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: clear_highbits64_c1_indexzext: @@ -621,41 +637,49 @@ define i64 @clear_highbits64_c2_load(i64* %w, i64 %numhighbits) nounwind { ; X86-NOBMI2-LABEL: clear_highbits64_c2_load: ; X86-NOBMI2: # %bb.0: +; X86-NOBMI2-NEXT: pushl %edi ; X86-NOBMI2-NEXT: pushl %esi ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl $-1, %eax -; X86-NOBMI2-NEXT: movl $-1, %edx -; X86-NOBMI2-NEXT: shrl %cl, %edx -; X86-NOBMI2-NEXT: shrdl %cl, %eax, %eax -; X86-NOBMI2-NEXT: testb $32, %cl -; X86-NOBMI2-NEXT: je .LBB15_2 -; X86-NOBMI2-NEXT: # %bb.1: -; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: movl $-1, %edi +; X86-NOBMI2-NEXT: shrl %cl, %edi ; X86-NOBMI2-NEXT: xorl %edx, %edx -; X86-NOBMI2-NEXT: .LBB15_2: +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB15_1 +; X86-NOBMI2-NEXT: # %bb.2: +; X86-NOBMI2-NEXT: movl %edi, %edx +; X86-NOBMI2-NEXT: jmp .LBB15_3 +; X86-NOBMI2-NEXT: .LBB15_1: +; X86-NOBMI2-NEXT: movl %edi, %eax +; X86-NOBMI2-NEXT: .LBB15_3: ; X86-NOBMI2-NEXT: andl (%esi), %eax ; X86-NOBMI2-NEXT: andl 4(%esi), %edx ; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: popl %edi ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: clear_highbits64_c2_load: ; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx ; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl ; X86-BMI2-NEXT: movl $-1, %eax -; X86-BMI2-NEXT: shrxl %ecx, %eax, %edx -; X86-BMI2-NEXT: shrdl %cl, %eax, %eax -; X86-BMI2-NEXT: testb $32, %cl -; X86-BMI2-NEXT: je .LBB15_2 -; X86-BMI2-NEXT: # %bb.1: -; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: shrxl %ebx, %eax, %esi ; X86-BMI2-NEXT: xorl %edx, %edx -; X86-BMI2-NEXT: .LBB15_2: -; X86-BMI2-NEXT: andl (%esi), %eax -; X86-BMI2-NEXT: andl 4(%esi), %edx +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: jne .LBB15_1 +; X86-BMI2-NEXT: # %bb.2: +; X86-BMI2-NEXT: movl %esi, %edx +; X86-BMI2-NEXT: jmp .LBB15_3 +; X86-BMI2-NEXT: .LBB15_1: +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: .LBB15_3: +; X86-BMI2-NEXT: andl (%ecx), %eax +; X86-BMI2-NEXT: andl 4(%ecx), %edx ; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %ebx ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: clear_highbits64_c2_load: @@ -681,41 +705,49 @@ define i64 @clear_highbits64_c3_load_indexzext(i64* %w, i8 %numhighbits) nounwind { ; X86-NOBMI2-LABEL: clear_highbits64_c3_load_indexzext: ; X86-NOBMI2: # %bb.0: +; X86-NOBMI2-NEXT: pushl %edi ; X86-NOBMI2-NEXT: pushl %esi ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl $-1, %eax -; X86-NOBMI2-NEXT: movl $-1, %edx -; X86-NOBMI2-NEXT: shrl %cl, %edx -; X86-NOBMI2-NEXT: shrdl %cl, %eax, %eax -; X86-NOBMI2-NEXT: testb $32, %cl -; X86-NOBMI2-NEXT: je .LBB16_2 -; X86-NOBMI2-NEXT: # %bb.1: -; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: movl $-1, %edi +; X86-NOBMI2-NEXT: shrl %cl, %edi ; X86-NOBMI2-NEXT: xorl %edx, %edx -; X86-NOBMI2-NEXT: .LBB16_2: +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB16_1 +; X86-NOBMI2-NEXT: # %bb.2: +; X86-NOBMI2-NEXT: movl %edi, %edx +; X86-NOBMI2-NEXT: jmp .LBB16_3 +; X86-NOBMI2-NEXT: .LBB16_1: +; X86-NOBMI2-NEXT: movl %edi, %eax +; X86-NOBMI2-NEXT: .LBB16_3: ; X86-NOBMI2-NEXT: andl (%esi), %eax ; X86-NOBMI2-NEXT: andl 4(%esi), %edx ; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: popl %edi ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: clear_highbits64_c3_load_indexzext: ; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx ; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl ; X86-BMI2-NEXT: movl $-1, %eax -; X86-BMI2-NEXT: shrxl %ecx, %eax, %edx -; X86-BMI2-NEXT: shrdl %cl, %eax, %eax -; X86-BMI2-NEXT: testb $32, %cl -; X86-BMI2-NEXT: je .LBB16_2 -; X86-BMI2-NEXT: # %bb.1: -; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: shrxl %ebx, %eax, %esi ; X86-BMI2-NEXT: xorl %edx, %edx -; X86-BMI2-NEXT: .LBB16_2: -; X86-BMI2-NEXT: andl (%esi), %eax -; X86-BMI2-NEXT: andl 4(%esi), %edx +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: jne .LBB16_1 +; X86-BMI2-NEXT: # %bb.2: +; X86-BMI2-NEXT: movl %esi, %edx +; X86-BMI2-NEXT: jmp .LBB16_3 +; X86-BMI2-NEXT: .LBB16_1: +; X86-BMI2-NEXT: movl %esi, %eax +; X86-BMI2-NEXT: .LBB16_3: +; X86-BMI2-NEXT: andl (%ecx), %eax +; X86-BMI2-NEXT: andl 4(%ecx), %edx ; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %ebx ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: clear_highbits64_c3_load_indexzext: @@ -743,35 +775,43 @@ define i64 @clear_highbits64_c4_commutative(i64 %val, i64 %numhighbits) nounwind { ; X86-NOBMI2-LABEL: clear_highbits64_c4_commutative: ; X86-NOBMI2: # %bb.0: +; X86-NOBMI2-NEXT: pushl %esi ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl $-1, %eax -; X86-NOBMI2-NEXT: movl $-1, %edx -; X86-NOBMI2-NEXT: shrl %cl, %edx -; X86-NOBMI2-NEXT: shrdl %cl, %eax, %eax -; X86-NOBMI2-NEXT: testb $32, %cl -; X86-NOBMI2-NEXT: je .LBB17_2 -; X86-NOBMI2-NEXT: # %bb.1: -; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: movl $-1, %esi +; X86-NOBMI2-NEXT: shrl %cl, %esi ; X86-NOBMI2-NEXT: xorl %edx, %edx -; X86-NOBMI2-NEXT: .LBB17_2: +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB17_1 +; X86-NOBMI2-NEXT: # %bb.2: +; X86-NOBMI2-NEXT: movl %esi, %edx +; X86-NOBMI2-NEXT: jmp .LBB17_3 +; X86-NOBMI2-NEXT: .LBB17_1: +; X86-NOBMI2-NEXT: movl %esi, %eax +; X86-NOBMI2-NEXT: .LBB17_3: ; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: popl %esi ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: clear_highbits64_c4_commutative: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl ; X86-BMI2-NEXT: movl $-1, %eax -; X86-BMI2-NEXT: shrxl %ecx, %eax, %edx -; X86-BMI2-NEXT: shrdl %cl, %eax, %eax -; X86-BMI2-NEXT: testb $32, %cl -; X86-BMI2-NEXT: je .LBB17_2 -; X86-BMI2-NEXT: # %bb.1: -; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: shrxl %ebx, %eax, %ecx ; X86-BMI2-NEXT: xorl %edx, %edx -; X86-BMI2-NEXT: .LBB17_2: +; X86-BMI2-NEXT: testb $32, %bl +; X86-BMI2-NEXT: jne .LBB17_1 +; X86-BMI2-NEXT: # %bb.2: +; X86-BMI2-NEXT: movl %ecx, %edx +; X86-BMI2-NEXT: jmp .LBB17_3 +; X86-BMI2-NEXT: .LBB17_1: +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: .LBB17_3: ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: popl %ebx ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: clear_highbits64_c4_commutative: @@ -882,7 +922,6 @@ ; X86-NOBMI2-NEXT: movl $-1, %esi ; X86-NOBMI2-NEXT: movl $-1, %edi ; X86-NOBMI2-NEXT: shrl %cl, %edi -; X86-NOBMI2-NEXT: shrdl %cl, %esi, %esi ; X86-NOBMI2-NEXT: testb $32, %cl ; X86-NOBMI2-NEXT: je .LBB19_2 ; X86-NOBMI2-NEXT: # %bb.1: @@ -908,25 +947,24 @@ ; X86-BMI2-NEXT: pushl %edi ; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: pushl %eax -; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI2-NEXT: movl $-1, %esi -; X86-BMI2-NEXT: shrxl %ecx, %esi, %edi -; X86-BMI2-NEXT: shrdl %cl, %esi, %esi -; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movl $-1, %edi +; X86-BMI2-NEXT: shrxl %eax, %edi, %esi +; X86-BMI2-NEXT: testb $32, %al ; X86-BMI2-NEXT: je .LBB19_2 ; X86-BMI2-NEXT: # %bb.1: -; X86-BMI2-NEXT: movl %edi, %esi -; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: movl %esi, %edi +; X86-BMI2-NEXT: xorl %esi, %esi ; X86-BMI2-NEXT: .LBB19_2: ; X86-BMI2-NEXT: subl $8, %esp -; X86-BMI2-NEXT: pushl %edi ; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: pushl %edi ; X86-BMI2-NEXT: calll use64 ; X86-BMI2-NEXT: addl $16, %esp -; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi -; X86-BMI2-NEXT: movl %esi, %eax -; X86-BMI2-NEXT: movl %edi, %edx +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl %edi, %eax +; X86-BMI2-NEXT: movl %esi, %edx ; X86-BMI2-NEXT: addl $4, %esp ; X86-BMI2-NEXT: popl %esi ; X86-BMI2-NEXT: popl %edi diff --git a/llvm/test/CodeGen/X86/clear-lowbits.ll b/llvm/test/CodeGen/X86/clear-lowbits.ll --- a/llvm/test/CodeGen/X86/clear-lowbits.ll +++ b/llvm/test/CodeGen/X86/clear-lowbits.ll @@ -502,15 +502,14 @@ ; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: shll %cl, %eax -; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx ; X86-NOBMI2-NEXT: testb $32, %cl ; X86-NOBMI2-NEXT: je .LBB13_2 ; X86-NOBMI2-NEXT: # %bb.1: ; X86-NOBMI2-NEXT: movl %eax, %edx ; X86-NOBMI2-NEXT: xorl %eax, %eax ; X86-NOBMI2-NEXT: .LBB13_2: -; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: clear_lowbits64_c0: @@ -518,15 +517,14 @@ ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl $-1, %edx ; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax -; X86-BMI2-NEXT: shldl %cl, %edx, %edx ; X86-BMI2-NEXT: testb $32, %cl ; X86-BMI2-NEXT: je .LBB13_2 ; X86-BMI2-NEXT: # %bb.1: ; X86-BMI2-NEXT: movl %eax, %edx ; X86-BMI2-NEXT: xorl %eax, %eax ; X86-BMI2-NEXT: .LBB13_2: -; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: clear_lowbits64_c0: @@ -555,15 +553,14 @@ ; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: shll %cl, %eax -; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx ; X86-NOBMI2-NEXT: testb $32, %cl ; X86-NOBMI2-NEXT: je .LBB14_2 ; X86-NOBMI2-NEXT: # %bb.1: ; X86-NOBMI2-NEXT: movl %eax, %edx ; X86-NOBMI2-NEXT: xorl %eax, %eax ; X86-NOBMI2-NEXT: .LBB14_2: -; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: clear_lowbits64_c1_indexzext: @@ -571,15 +568,14 @@ ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl $-1, %edx ; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax -; X86-BMI2-NEXT: shldl %cl, %edx, %edx ; X86-BMI2-NEXT: testb $32, %cl ; X86-BMI2-NEXT: je .LBB14_2 ; X86-BMI2-NEXT: # %bb.1: ; X86-BMI2-NEXT: movl %eax, %edx ; X86-BMI2-NEXT: xorl %eax, %eax ; X86-BMI2-NEXT: .LBB14_2: -; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: clear_lowbits64_c1_indexzext: @@ -612,35 +608,33 @@ ; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: shll %cl, %eax -; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx ; X86-NOBMI2-NEXT: testb $32, %cl ; X86-NOBMI2-NEXT: je .LBB15_2 ; X86-NOBMI2-NEXT: # %bb.1: ; X86-NOBMI2-NEXT: movl %eax, %edx ; X86-NOBMI2-NEXT: xorl %eax, %eax ; X86-NOBMI2-NEXT: .LBB15_2: -; X86-NOBMI2-NEXT: andl 4(%esi), %edx ; X86-NOBMI2-NEXT: andl (%esi), %eax +; X86-NOBMI2-NEXT: andl 4(%esi), %edx ; X86-NOBMI2-NEXT: popl %esi ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: clear_lowbits64_c2_load: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl ; X86-BMI2-NEXT: movl $-1, %edx -; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax -; X86-BMI2-NEXT: shldl %cl, %edx, %edx -; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: shlxl %ebx, %edx, %eax +; X86-BMI2-NEXT: testb $32, %bl ; X86-BMI2-NEXT: je .LBB15_2 ; X86-BMI2-NEXT: # %bb.1: ; X86-BMI2-NEXT: movl %eax, %edx ; X86-BMI2-NEXT: xorl %eax, %eax ; X86-BMI2-NEXT: .LBB15_2: -; X86-BMI2-NEXT: andl 4(%esi), %edx -; X86-BMI2-NEXT: andl (%esi), %eax -; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: andl (%ecx), %eax +; X86-BMI2-NEXT: andl 4(%ecx), %edx +; X86-BMI2-NEXT: popl %ebx ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: clear_lowbits64_c2_load: @@ -672,35 +666,33 @@ ; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: shll %cl, %eax -; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx ; X86-NOBMI2-NEXT: testb $32, %cl ; X86-NOBMI2-NEXT: je .LBB16_2 ; X86-NOBMI2-NEXT: # %bb.1: ; X86-NOBMI2-NEXT: movl %eax, %edx ; X86-NOBMI2-NEXT: xorl %eax, %eax ; X86-NOBMI2-NEXT: .LBB16_2: -; X86-NOBMI2-NEXT: andl 4(%esi), %edx ; X86-NOBMI2-NEXT: andl (%esi), %eax +; X86-NOBMI2-NEXT: andl 4(%esi), %edx ; X86-NOBMI2-NEXT: popl %esi ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: clear_lowbits64_c3_load_indexzext: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl ; X86-BMI2-NEXT: movl $-1, %edx -; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax -; X86-BMI2-NEXT: shldl %cl, %edx, %edx -; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: shlxl %ebx, %edx, %eax +; X86-BMI2-NEXT: testb $32, %bl ; X86-BMI2-NEXT: je .LBB16_2 ; X86-BMI2-NEXT: # %bb.1: ; X86-BMI2-NEXT: movl %eax, %edx ; X86-BMI2-NEXT: xorl %eax, %eax ; X86-BMI2-NEXT: .LBB16_2: -; X86-BMI2-NEXT: andl 4(%esi), %edx -; X86-BMI2-NEXT: andl (%esi), %eax -; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: andl (%ecx), %eax +; X86-BMI2-NEXT: andl 4(%ecx), %edx +; X86-BMI2-NEXT: popl %ebx ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: clear_lowbits64_c3_load_indexzext: @@ -732,15 +724,14 @@ ; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: shll %cl, %eax -; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx ; X86-NOBMI2-NEXT: testb $32, %cl ; X86-NOBMI2-NEXT: je .LBB17_2 ; X86-NOBMI2-NEXT: # %bb.1: ; X86-NOBMI2-NEXT: movl %eax, %edx ; X86-NOBMI2-NEXT: xorl %eax, %eax ; X86-NOBMI2-NEXT: .LBB17_2: -; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: clear_lowbits64_c4_commutative: @@ -748,15 +739,14 @@ ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl $-1, %edx ; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax -; X86-BMI2-NEXT: shldl %cl, %edx, %edx ; X86-BMI2-NEXT: testb $32, %cl ; X86-BMI2-NEXT: je .LBB17_2 ; X86-BMI2-NEXT: # %bb.1: ; X86-BMI2-NEXT: movl %eax, %edx ; X86-BMI2-NEXT: xorl %eax, %eax ; X86-BMI2-NEXT: .LBB17_2: -; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: clear_lowbits64_c4_commutative: @@ -1325,15 +1315,14 @@ ; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: shll %cl, %eax -; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx ; X86-NOBMI2-NEXT: testb $32, %cl ; X86-NOBMI2-NEXT: je .LBB31_2 ; X86-NOBMI2-NEXT: # %bb.1: ; X86-NOBMI2-NEXT: movl %eax, %edx ; X86-NOBMI2-NEXT: xorl %eax, %eax ; X86-NOBMI2-NEXT: .LBB31_2: -; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: clear_lowbits64_ic0: @@ -1342,15 +1331,14 @@ ; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl $-1, %edx ; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax -; X86-BMI2-NEXT: shldl %cl, %edx, %edx ; X86-BMI2-NEXT: testb $32, %cl ; X86-BMI2-NEXT: je .LBB31_2 ; X86-BMI2-NEXT: # %bb.1: ; X86-BMI2-NEXT: movl %eax, %edx ; X86-BMI2-NEXT: xorl %eax, %eax ; X86-BMI2-NEXT: .LBB31_2: -; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: clear_lowbits64_ic0: @@ -1383,15 +1371,14 @@ ; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: shll %cl, %eax -; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx ; X86-NOBMI2-NEXT: testb $32, %cl ; X86-NOBMI2-NEXT: je .LBB32_2 ; X86-NOBMI2-NEXT: # %bb.1: ; X86-NOBMI2-NEXT: movl %eax, %edx ; X86-NOBMI2-NEXT: xorl %eax, %eax ; X86-NOBMI2-NEXT: .LBB32_2: -; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: clear_lowbits64_ic1_indexzext: @@ -1400,15 +1387,14 @@ ; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl $-1, %edx ; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax -; X86-BMI2-NEXT: shldl %cl, %edx, %edx ; X86-BMI2-NEXT: testb $32, %cl ; X86-BMI2-NEXT: je .LBB32_2 ; X86-BMI2-NEXT: # %bb.1: ; X86-BMI2-NEXT: movl %eax, %edx ; X86-BMI2-NEXT: xorl %eax, %eax ; X86-BMI2-NEXT: .LBB32_2: -; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: clear_lowbits64_ic1_indexzext: @@ -1445,36 +1431,34 @@ ; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: shll %cl, %eax -; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx ; X86-NOBMI2-NEXT: testb $32, %cl ; X86-NOBMI2-NEXT: je .LBB33_2 ; X86-NOBMI2-NEXT: # %bb.1: ; X86-NOBMI2-NEXT: movl %eax, %edx ; X86-NOBMI2-NEXT: xorl %eax, %eax ; X86-NOBMI2-NEXT: .LBB33_2: -; X86-NOBMI2-NEXT: andl 4(%esi), %edx ; X86-NOBMI2-NEXT: andl (%esi), %eax +; X86-NOBMI2-NEXT: andl 4(%esi), %edx ; X86-NOBMI2-NEXT: popl %esi ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: clear_lowbits64_ic2_load: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI2-NEXT: movb $64, %cl -; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb $64, %bl +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %bl ; X86-BMI2-NEXT: movl $-1, %edx -; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax -; X86-BMI2-NEXT: shldl %cl, %edx, %edx -; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: shlxl %ebx, %edx, %eax +; X86-BMI2-NEXT: testb $32, %bl ; X86-BMI2-NEXT: je .LBB33_2 ; X86-BMI2-NEXT: # %bb.1: ; X86-BMI2-NEXT: movl %eax, %edx ; X86-BMI2-NEXT: xorl %eax, %eax ; X86-BMI2-NEXT: .LBB33_2: -; X86-BMI2-NEXT: andl 4(%esi), %edx -; X86-BMI2-NEXT: andl (%esi), %eax -; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: andl (%ecx), %eax +; X86-BMI2-NEXT: andl 4(%ecx), %edx +; X86-BMI2-NEXT: popl %ebx ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: clear_lowbits64_ic2_load: @@ -1510,36 +1494,34 @@ ; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: shll %cl, %eax -; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx ; X86-NOBMI2-NEXT: testb $32, %cl ; X86-NOBMI2-NEXT: je .LBB34_2 ; X86-NOBMI2-NEXT: # %bb.1: ; X86-NOBMI2-NEXT: movl %eax, %edx ; X86-NOBMI2-NEXT: xorl %eax, %eax ; X86-NOBMI2-NEXT: .LBB34_2: -; X86-NOBMI2-NEXT: andl 4(%esi), %edx ; X86-NOBMI2-NEXT: andl (%esi), %eax +; X86-NOBMI2-NEXT: andl 4(%esi), %edx ; X86-NOBMI2-NEXT: popl %esi ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: clear_lowbits64_ic3_load_indexzext: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI2-NEXT: movb $64, %cl -; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb $64, %bl +; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %bl ; X86-BMI2-NEXT: movl $-1, %edx -; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax -; X86-BMI2-NEXT: shldl %cl, %edx, %edx -; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: shlxl %ebx, %edx, %eax +; X86-BMI2-NEXT: testb $32, %bl ; X86-BMI2-NEXT: je .LBB34_2 ; X86-BMI2-NEXT: # %bb.1: ; X86-BMI2-NEXT: movl %eax, %edx ; X86-BMI2-NEXT: xorl %eax, %eax ; X86-BMI2-NEXT: .LBB34_2: -; X86-BMI2-NEXT: andl 4(%esi), %edx -; X86-BMI2-NEXT: andl (%esi), %eax -; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: andl (%ecx), %eax +; X86-BMI2-NEXT: andl 4(%ecx), %edx +; X86-BMI2-NEXT: popl %ebx ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: clear_lowbits64_ic3_load_indexzext: @@ -1575,15 +1557,14 @@ ; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: shll %cl, %eax -; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx ; X86-NOBMI2-NEXT: testb $32, %cl ; X86-NOBMI2-NEXT: je .LBB35_2 ; X86-NOBMI2-NEXT: # %bb.1: ; X86-NOBMI2-NEXT: movl %eax, %edx ; X86-NOBMI2-NEXT: xorl %eax, %eax ; X86-NOBMI2-NEXT: .LBB35_2: -; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: clear_lowbits64_ic4_commutative: @@ -1592,15 +1573,14 @@ ; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl $-1, %edx ; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax -; X86-BMI2-NEXT: shldl %cl, %edx, %edx ; X86-BMI2-NEXT: testb $32, %cl ; X86-BMI2-NEXT: je .LBB35_2 ; X86-BMI2-NEXT: # %bb.1: ; X86-BMI2-NEXT: movl %eax, %edx ; X86-BMI2-NEXT: xorl %eax, %eax ; X86-BMI2-NEXT: .LBB35_2: -; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: clear_lowbits64_ic4_commutative: @@ -1712,22 +1692,24 @@ ; X86-NOBMI2-NEXT: pushl %eax ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl $-1, %esi -; X86-NOBMI2-NEXT: movl $-1, %edi -; X86-NOBMI2-NEXT: shll %cl, %edi -; X86-NOBMI2-NEXT: shldl %cl, %esi, %esi -; X86-NOBMI2-NEXT: testb $32, %cl -; X86-NOBMI2-NEXT: je .LBB37_2 -; X86-NOBMI2-NEXT: # %bb.1: -; X86-NOBMI2-NEXT: movl %edi, %esi +; X86-NOBMI2-NEXT: movl $-1, %eax +; X86-NOBMI2-NEXT: shll %cl, %eax ; X86-NOBMI2-NEXT: xorl %edi, %edi -; X86-NOBMI2-NEXT: .LBB37_2: +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB37_1 +; X86-NOBMI2-NEXT: # %bb.2: +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: jmp .LBB37_3 +; X86-NOBMI2-NEXT: .LBB37_1: +; X86-NOBMI2-NEXT: movl %eax, %esi +; X86-NOBMI2-NEXT: .LBB37_3: ; X86-NOBMI2-NEXT: subl $8, %esp ; X86-NOBMI2-NEXT: pushl %esi ; X86-NOBMI2-NEXT: pushl %edi ; X86-NOBMI2-NEXT: calll use64 ; X86-NOBMI2-NEXT: addl $16, %esp -; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %esi ; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %esi ; X86-NOBMI2-NEXT: movl %edi, %eax ; X86-NOBMI2-NEXT: movl %esi, %edx ; X86-NOBMI2-NEXT: addl $4, %esp @@ -1742,21 +1724,23 @@ ; X86-BMI2-NEXT: pushl %eax ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl $-1, %esi -; X86-BMI2-NEXT: shlxl %ecx, %esi, %edi -; X86-BMI2-NEXT: shldl %cl, %esi, %esi -; X86-BMI2-NEXT: testb $32, %cl -; X86-BMI2-NEXT: je .LBB37_2 -; X86-BMI2-NEXT: # %bb.1: -; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: shlxl %ecx, %esi, %eax ; X86-BMI2-NEXT: xorl %edi, %edi -; X86-BMI2-NEXT: .LBB37_2: +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: jne .LBB37_1 +; X86-BMI2-NEXT: # %bb.2: +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: jmp .LBB37_3 +; X86-BMI2-NEXT: .LBB37_1: +; X86-BMI2-NEXT: movl %eax, %esi +; X86-BMI2-NEXT: .LBB37_3: ; X86-BMI2-NEXT: subl $8, %esp ; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: pushl %edi ; X86-BMI2-NEXT: calll use64 ; X86-BMI2-NEXT: addl $16, %esp -; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi ; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi ; X86-BMI2-NEXT: movl %edi, %eax ; X86-BMI2-NEXT: movl %esi, %edx ; X86-BMI2-NEXT: addl $4, %esp diff --git a/llvm/test/CodeGen/X86/extract-bits.ll b/llvm/test/CodeGen/X86/extract-bits.ll --- a/llvm/test/CodeGen/X86/extract-bits.ll +++ b/llvm/test/CodeGen/X86/extract-bits.ll @@ -2661,6 +2661,7 @@ define i64 @bextr64_b0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bextr64_b0: ; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %ch @@ -2670,6 +2671,7 @@ ; X86-NOBMI-NEXT: movl %eax, %edi ; X86-NOBMI-NEXT: shrl %cl, %edi ; X86-NOBMI-NEXT: shrdl %cl, %eax, %esi +; X86-NOBMI-NEXT: xorl %eax, %eax ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB25_2 ; X86-NOBMI-NEXT: # %bb.1: @@ -2677,22 +2679,24 @@ ; X86-NOBMI-NEXT: xorl %edi, %edi ; X86-NOBMI-NEXT: .LBB25_2: ; X86-NOBMI-NEXT: movl $-1, %edx -; X86-NOBMI-NEXT: movl $-1, %eax +; X86-NOBMI-NEXT: movl $-1, %ebx ; X86-NOBMI-NEXT: movb %ch, %cl -; X86-NOBMI-NEXT: shll %cl, %eax -; X86-NOBMI-NEXT: shldl %cl, %edx, %edx +; X86-NOBMI-NEXT: shll %cl, %ebx ; X86-NOBMI-NEXT: testb $32, %ch -; X86-NOBMI-NEXT: je .LBB25_4 -; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %eax, %edx -; X86-NOBMI-NEXT: xorl %eax, %eax -; X86-NOBMI-NEXT: .LBB25_4: +; X86-NOBMI-NEXT: jne .LBB25_3 +; X86-NOBMI-NEXT: # %bb.4: +; X86-NOBMI-NEXT: movl %ebx, %eax +; X86-NOBMI-NEXT: jmp .LBB25_5 +; X86-NOBMI-NEXT: .LBB25_3: +; X86-NOBMI-NEXT: movl %ebx, %edx +; X86-NOBMI-NEXT: .LBB25_5: ; X86-NOBMI-NEXT: notl %edx ; X86-NOBMI-NEXT: andl %edi, %edx ; X86-NOBMI-NEXT: notl %eax ; X86-NOBMI-NEXT: andl %esi, %eax ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: popl %edi +; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; ; X86-BMI1NOTBM-LABEL: bextr64_b0: @@ -2717,7 +2721,6 @@ ; X86-BMI1NOTBM-NEXT: movl $-1, %ebx ; X86-BMI1NOTBM-NEXT: movl %eax, %ecx ; X86-BMI1NOTBM-NEXT: shll %cl, %ebx -; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %edi ; X86-BMI1NOTBM-NEXT: testb $32, %al ; X86-BMI1NOTBM-NEXT: je .LBB25_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: @@ -2734,34 +2737,30 @@ ; X86-BMI1BMI2-LABEL: bextr64_b0: ; X86-BMI1BMI2: # %bb.0: ; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi ; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl ; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %esi +; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax ; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %edx ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB25_2 ; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edx, %esi +; X86-BMI1BMI2-NEXT: movl %edx, %eax ; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: .LBB25_2: -; X86-BMI1BMI2-NEXT: movl $-1, %edi -; X86-BMI1BMI2-NEXT: shlxl %eax, %edi, %ebx -; X86-BMI1BMI2-NEXT: movl %eax, %ecx -; X86-BMI1BMI2-NEXT: shldl %cl, %edi, %edi -; X86-BMI1BMI2-NEXT: testb $32, %al +; X86-BMI1BMI2-NEXT: movl $-1, %esi +; X86-BMI1BMI2-NEXT: shlxl %ebx, %esi, %ecx +; X86-BMI1BMI2-NEXT: testb $32, %bl ; X86-BMI1BMI2-NEXT: je .LBB25_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebx, %edi -; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI1BMI2-NEXT: movl %ecx, %esi +; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx ; X86-BMI1BMI2-NEXT: .LBB25_4: -; X86-BMI1BMI2-NEXT: andnl %edx, %edi, %edx -; X86-BMI1BMI2-NEXT: andnl %esi, %ebx, %eax +; X86-BMI1BMI2-NEXT: andnl %edx, %esi, %edx +; X86-BMI1BMI2-NEXT: andnl %eax, %ecx, %eax ; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi ; X86-BMI1BMI2-NEXT: popl %ebx ; X86-BMI1BMI2-NEXT: retl ; @@ -2800,6 +2799,7 @@ define i64 @bextr64_b1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind { ; X86-NOBMI-LABEL: bextr64_b1_indexzext: ; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %ch @@ -2809,6 +2809,7 @@ ; X86-NOBMI-NEXT: movl %eax, %edi ; X86-NOBMI-NEXT: shrl %cl, %edi ; X86-NOBMI-NEXT: shrdl %cl, %eax, %esi +; X86-NOBMI-NEXT: xorl %eax, %eax ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB26_2 ; X86-NOBMI-NEXT: # %bb.1: @@ -2816,22 +2817,24 @@ ; X86-NOBMI-NEXT: xorl %edi, %edi ; X86-NOBMI-NEXT: .LBB26_2: ; X86-NOBMI-NEXT: movl $-1, %edx -; X86-NOBMI-NEXT: movl $-1, %eax +; X86-NOBMI-NEXT: movl $-1, %ebx ; X86-NOBMI-NEXT: movb %ch, %cl -; X86-NOBMI-NEXT: shll %cl, %eax -; X86-NOBMI-NEXT: shldl %cl, %edx, %edx +; X86-NOBMI-NEXT: shll %cl, %ebx ; X86-NOBMI-NEXT: testb $32, %ch -; X86-NOBMI-NEXT: je .LBB26_4 -; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %eax, %edx -; X86-NOBMI-NEXT: xorl %eax, %eax -; X86-NOBMI-NEXT: .LBB26_4: +; X86-NOBMI-NEXT: jne .LBB26_3 +; X86-NOBMI-NEXT: # %bb.4: +; X86-NOBMI-NEXT: movl %ebx, %eax +; X86-NOBMI-NEXT: jmp .LBB26_5 +; X86-NOBMI-NEXT: .LBB26_3: +; X86-NOBMI-NEXT: movl %ebx, %edx +; X86-NOBMI-NEXT: .LBB26_5: ; X86-NOBMI-NEXT: notl %edx ; X86-NOBMI-NEXT: andl %edi, %edx ; X86-NOBMI-NEXT: notl %eax ; X86-NOBMI-NEXT: andl %esi, %eax ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: popl %edi +; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; ; X86-BMI1NOTBM-LABEL: bextr64_b1_indexzext: @@ -2856,7 +2859,6 @@ ; X86-BMI1NOTBM-NEXT: movl $-1, %ebx ; X86-BMI1NOTBM-NEXT: movl %eax, %ecx ; X86-BMI1NOTBM-NEXT: shll %cl, %ebx -; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %edi ; X86-BMI1NOTBM-NEXT: testb $32, %al ; X86-BMI1NOTBM-NEXT: je .LBB26_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: @@ -2873,34 +2875,30 @@ ; X86-BMI1BMI2-LABEL: bextr64_b1_indexzext: ; X86-BMI1BMI2: # %bb.0: ; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi ; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl ; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %esi +; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax ; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %edx ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB26_2 ; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edx, %esi +; X86-BMI1BMI2-NEXT: movl %edx, %eax ; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: .LBB26_2: -; X86-BMI1BMI2-NEXT: movl $-1, %edi -; X86-BMI1BMI2-NEXT: shlxl %eax, %edi, %ebx -; X86-BMI1BMI2-NEXT: movl %eax, %ecx -; X86-BMI1BMI2-NEXT: shldl %cl, %edi, %edi -; X86-BMI1BMI2-NEXT: testb $32, %al +; X86-BMI1BMI2-NEXT: movl $-1, %esi +; X86-BMI1BMI2-NEXT: shlxl %ebx, %esi, %ecx +; X86-BMI1BMI2-NEXT: testb $32, %bl ; X86-BMI1BMI2-NEXT: je .LBB26_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebx, %edi -; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI1BMI2-NEXT: movl %ecx, %esi +; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx ; X86-BMI1BMI2-NEXT: .LBB26_4: -; X86-BMI1BMI2-NEXT: andnl %edx, %edi, %edx -; X86-BMI1BMI2-NEXT: andnl %esi, %ebx, %eax +; X86-BMI1BMI2-NEXT: andnl %edx, %esi, %edx +; X86-BMI1BMI2-NEXT: andnl %eax, %ecx, %eax ; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi ; X86-BMI1BMI2-NEXT: popl %ebx ; X86-BMI1BMI2-NEXT: retl ; @@ -2943,6 +2941,7 @@ define i64 @bextr64_b2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bextr64_b2_load: ; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %ch @@ -2953,6 +2952,7 @@ ; X86-NOBMI-NEXT: movl %eax, %edi ; X86-NOBMI-NEXT: shrl %cl, %edi ; X86-NOBMI-NEXT: shrdl %cl, %eax, %esi +; X86-NOBMI-NEXT: xorl %eax, %eax ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB27_2 ; X86-NOBMI-NEXT: # %bb.1: @@ -2960,22 +2960,24 @@ ; X86-NOBMI-NEXT: xorl %edi, %edi ; X86-NOBMI-NEXT: .LBB27_2: ; X86-NOBMI-NEXT: movl $-1, %edx -; X86-NOBMI-NEXT: movl $-1, %eax +; X86-NOBMI-NEXT: movl $-1, %ebx ; X86-NOBMI-NEXT: movb %ch, %cl -; X86-NOBMI-NEXT: shll %cl, %eax -; X86-NOBMI-NEXT: shldl %cl, %edx, %edx +; X86-NOBMI-NEXT: shll %cl, %ebx ; X86-NOBMI-NEXT: testb $32, %ch -; X86-NOBMI-NEXT: je .LBB27_4 -; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %eax, %edx -; X86-NOBMI-NEXT: xorl %eax, %eax -; X86-NOBMI-NEXT: .LBB27_4: +; X86-NOBMI-NEXT: jne .LBB27_3 +; X86-NOBMI-NEXT: # %bb.4: +; X86-NOBMI-NEXT: movl %ebx, %eax +; X86-NOBMI-NEXT: jmp .LBB27_5 +; X86-NOBMI-NEXT: .LBB27_3: +; X86-NOBMI-NEXT: movl %ebx, %edx +; X86-NOBMI-NEXT: .LBB27_5: ; X86-NOBMI-NEXT: notl %edx ; X86-NOBMI-NEXT: andl %edi, %edx ; X86-NOBMI-NEXT: notl %eax ; X86-NOBMI-NEXT: andl %esi, %eax ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: popl %edi +; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; ; X86-BMI1NOTBM-LABEL: bextr64_b2_load: @@ -3001,7 +3003,6 @@ ; X86-BMI1NOTBM-NEXT: movl $-1, %ebx ; X86-BMI1NOTBM-NEXT: movl %eax, %ecx ; X86-BMI1NOTBM-NEXT: shll %cl, %ebx -; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %edi ; X86-BMI1NOTBM-NEXT: testb $32, %al ; X86-BMI1NOTBM-NEXT: je .LBB27_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: @@ -3018,35 +3019,31 @@ ; X86-BMI1BMI2-LABEL: bextr64_b2_load: ; X86-BMI1BMI2: # %bb.0: ; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi ; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl ; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl (%edx), %esi -; X86-BMI1BMI2-NEXT: movl 4(%edx), %edi -; X86-BMI1BMI2-NEXT: shrxl %ecx, %edi, %edx -; X86-BMI1BMI2-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1BMI2-NEXT: movl (%edx), %eax +; X86-BMI1BMI2-NEXT: movl 4(%edx), %esi +; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edx +; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %eax ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB27_2 ; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edx, %esi +; X86-BMI1BMI2-NEXT: movl %edx, %eax ; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: .LBB27_2: -; X86-BMI1BMI2-NEXT: movl $-1, %edi -; X86-BMI1BMI2-NEXT: shlxl %eax, %edi, %ebx -; X86-BMI1BMI2-NEXT: movl %eax, %ecx -; X86-BMI1BMI2-NEXT: shldl %cl, %edi, %edi -; X86-BMI1BMI2-NEXT: testb $32, %al +; X86-BMI1BMI2-NEXT: movl $-1, %esi +; X86-BMI1BMI2-NEXT: shlxl %ebx, %esi, %ecx +; X86-BMI1BMI2-NEXT: testb $32, %bl ; X86-BMI1BMI2-NEXT: je .LBB27_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebx, %edi -; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI1BMI2-NEXT: movl %ecx, %esi +; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx ; X86-BMI1BMI2-NEXT: .LBB27_4: -; X86-BMI1BMI2-NEXT: andnl %edx, %edi, %edx -; X86-BMI1BMI2-NEXT: andnl %esi, %ebx, %eax +; X86-BMI1BMI2-NEXT: andnl %edx, %esi, %edx +; X86-BMI1BMI2-NEXT: andnl %eax, %ecx, %eax ; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi ; X86-BMI1BMI2-NEXT: popl %ebx ; X86-BMI1BMI2-NEXT: retl ; @@ -3087,6 +3084,7 @@ define i64 @bextr64_b3_load_indexzext(i64* %w, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind { ; X86-NOBMI-LABEL: bextr64_b3_load_indexzext: ; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %ch @@ -3097,6 +3095,7 @@ ; X86-NOBMI-NEXT: movl %eax, %edi ; X86-NOBMI-NEXT: shrl %cl, %edi ; X86-NOBMI-NEXT: shrdl %cl, %eax, %esi +; X86-NOBMI-NEXT: xorl %eax, %eax ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB28_2 ; X86-NOBMI-NEXT: # %bb.1: @@ -3104,22 +3103,24 @@ ; X86-NOBMI-NEXT: xorl %edi, %edi ; X86-NOBMI-NEXT: .LBB28_2: ; X86-NOBMI-NEXT: movl $-1, %edx -; X86-NOBMI-NEXT: movl $-1, %eax +; X86-NOBMI-NEXT: movl $-1, %ebx ; X86-NOBMI-NEXT: movb %ch, %cl -; X86-NOBMI-NEXT: shll %cl, %eax -; X86-NOBMI-NEXT: shldl %cl, %edx, %edx +; X86-NOBMI-NEXT: shll %cl, %ebx ; X86-NOBMI-NEXT: testb $32, %ch -; X86-NOBMI-NEXT: je .LBB28_4 -; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %eax, %edx -; X86-NOBMI-NEXT: xorl %eax, %eax -; X86-NOBMI-NEXT: .LBB28_4: +; X86-NOBMI-NEXT: jne .LBB28_3 +; X86-NOBMI-NEXT: # %bb.4: +; X86-NOBMI-NEXT: movl %ebx, %eax +; X86-NOBMI-NEXT: jmp .LBB28_5 +; X86-NOBMI-NEXT: .LBB28_3: +; X86-NOBMI-NEXT: movl %ebx, %edx +; X86-NOBMI-NEXT: .LBB28_5: ; X86-NOBMI-NEXT: notl %edx ; X86-NOBMI-NEXT: andl %edi, %edx ; X86-NOBMI-NEXT: notl %eax ; X86-NOBMI-NEXT: andl %esi, %eax ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: popl %edi +; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; ; X86-BMI1NOTBM-LABEL: bextr64_b3_load_indexzext: @@ -3145,7 +3146,6 @@ ; X86-BMI1NOTBM-NEXT: movl $-1, %ebx ; X86-BMI1NOTBM-NEXT: movl %eax, %ecx ; X86-BMI1NOTBM-NEXT: shll %cl, %ebx -; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %edi ; X86-BMI1NOTBM-NEXT: testb $32, %al ; X86-BMI1NOTBM-NEXT: je .LBB28_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: @@ -3162,35 +3162,31 @@ ; X86-BMI1BMI2-LABEL: bextr64_b3_load_indexzext: ; X86-BMI1BMI2: # %bb.0: ; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi ; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl ; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movl (%edx), %esi -; X86-BMI1BMI2-NEXT: movl 4(%edx), %edi -; X86-BMI1BMI2-NEXT: shrxl %ecx, %edi, %edx -; X86-BMI1BMI2-NEXT: shrdl %cl, %edi, %esi +; X86-BMI1BMI2-NEXT: movl (%edx), %eax +; X86-BMI1BMI2-NEXT: movl 4(%edx), %esi +; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edx +; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %eax ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB28_2 ; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edx, %esi +; X86-BMI1BMI2-NEXT: movl %edx, %eax ; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: .LBB28_2: -; X86-BMI1BMI2-NEXT: movl $-1, %edi -; X86-BMI1BMI2-NEXT: shlxl %eax, %edi, %ebx -; X86-BMI1BMI2-NEXT: movl %eax, %ecx -; X86-BMI1BMI2-NEXT: shldl %cl, %edi, %edi -; X86-BMI1BMI2-NEXT: testb $32, %al +; X86-BMI1BMI2-NEXT: movl $-1, %esi +; X86-BMI1BMI2-NEXT: shlxl %ebx, %esi, %ecx +; X86-BMI1BMI2-NEXT: testb $32, %bl ; X86-BMI1BMI2-NEXT: je .LBB28_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebx, %edi -; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI1BMI2-NEXT: movl %ecx, %esi +; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx ; X86-BMI1BMI2-NEXT: .LBB28_4: -; X86-BMI1BMI2-NEXT: andnl %edx, %edi, %edx -; X86-BMI1BMI2-NEXT: andnl %esi, %ebx, %eax +; X86-BMI1BMI2-NEXT: andnl %edx, %esi, %edx +; X86-BMI1BMI2-NEXT: andnl %eax, %ecx, %eax ; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi ; X86-BMI1BMI2-NEXT: popl %ebx ; X86-BMI1BMI2-NEXT: retl ; @@ -3235,6 +3231,7 @@ define i64 @bextr64_b4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bextr64_b4_commutative: ; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %ch @@ -3244,6 +3241,7 @@ ; X86-NOBMI-NEXT: movl %esi, %edx ; X86-NOBMI-NEXT: shrl %cl, %edx ; X86-NOBMI-NEXT: shrdl %cl, %esi, %eax +; X86-NOBMI-NEXT: xorl %esi, %esi ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB29_2 ; X86-NOBMI-NEXT: # %bb.1: @@ -3251,22 +3249,24 @@ ; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NOBMI-NEXT: .LBB29_2: ; X86-NOBMI-NEXT: movl $-1, %edi -; X86-NOBMI-NEXT: movl $-1, %esi +; X86-NOBMI-NEXT: movl $-1, %ebx ; X86-NOBMI-NEXT: movb %ch, %cl -; X86-NOBMI-NEXT: shll %cl, %esi -; X86-NOBMI-NEXT: shldl %cl, %edi, %edi +; X86-NOBMI-NEXT: shll %cl, %ebx ; X86-NOBMI-NEXT: testb $32, %ch -; X86-NOBMI-NEXT: je .LBB29_4 -; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %esi, %edi -; X86-NOBMI-NEXT: xorl %esi, %esi -; X86-NOBMI-NEXT: .LBB29_4: +; X86-NOBMI-NEXT: jne .LBB29_3 +; X86-NOBMI-NEXT: # %bb.4: +; X86-NOBMI-NEXT: movl %ebx, %esi +; X86-NOBMI-NEXT: jmp .LBB29_5 +; X86-NOBMI-NEXT: .LBB29_3: +; X86-NOBMI-NEXT: movl %ebx, %edi +; X86-NOBMI-NEXT: .LBB29_5: ; X86-NOBMI-NEXT: notl %edi ; X86-NOBMI-NEXT: andl %edi, %edx ; X86-NOBMI-NEXT: notl %esi ; X86-NOBMI-NEXT: andl %esi, %eax ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: popl %edi +; X86-NOBMI-NEXT: popl %ebx ; X86-NOBMI-NEXT: retl ; ; X86-BMI1NOTBM-LABEL: bextr64_b4_commutative: @@ -3291,7 +3291,6 @@ ; X86-BMI1NOTBM-NEXT: movl $-1, %ebx ; X86-BMI1NOTBM-NEXT: movl %eax, %ecx ; X86-BMI1NOTBM-NEXT: shll %cl, %ebx -; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %edi ; X86-BMI1NOTBM-NEXT: testb $32, %al ; X86-BMI1NOTBM-NEXT: je .LBB29_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: @@ -3308,34 +3307,30 @@ ; X86-BMI1BMI2-LABEL: bextr64_b4_commutative: ; X86-BMI1BMI2: # %bb.0: ; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %edi ; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl ; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %esi +; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax ; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %edx ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB29_2 ; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edx, %esi +; X86-BMI1BMI2-NEXT: movl %edx, %eax ; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: .LBB29_2: -; X86-BMI1BMI2-NEXT: movl $-1, %edi -; X86-BMI1BMI2-NEXT: shlxl %eax, %edi, %ebx -; X86-BMI1BMI2-NEXT: movl %eax, %ecx -; X86-BMI1BMI2-NEXT: shldl %cl, %edi, %edi -; X86-BMI1BMI2-NEXT: testb $32, %al +; X86-BMI1BMI2-NEXT: movl $-1, %esi +; X86-BMI1BMI2-NEXT: shlxl %ebx, %esi, %ecx +; X86-BMI1BMI2-NEXT: testb $32, %bl ; X86-BMI1BMI2-NEXT: je .LBB29_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebx, %edi -; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI1BMI2-NEXT: movl %ecx, %esi +; X86-BMI1BMI2-NEXT: xorl %ecx, %ecx ; X86-BMI1BMI2-NEXT: .LBB29_4: -; X86-BMI1BMI2-NEXT: andnl %edx, %edi, %edx -; X86-BMI1BMI2-NEXT: andnl %esi, %ebx, %eax +; X86-BMI1BMI2-NEXT: andnl %edx, %esi, %edx +; X86-BMI1BMI2-NEXT: andnl %eax, %ecx, %eax ; X86-BMI1BMI2-NEXT: popl %esi -; X86-BMI1BMI2-NEXT: popl %edi ; X86-BMI1BMI2-NEXT: popl %ebx ; X86-BMI1BMI2-NEXT: retl ; @@ -3379,42 +3374,44 @@ ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: subl $12, %esp -; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl -; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl %esi, %ebp -; X86-NOBMI-NEXT: movl %eax, %ecx +; X86-NOBMI-NEXT: movb %al, %cl ; X86-NOBMI-NEXT: shrl %cl, %ebp -; X86-NOBMI-NEXT: shrdl %cl, %esi, %ebx +; X86-NOBMI-NEXT: shrdl %cl, %esi, %edx +; X86-NOBMI-NEXT: xorl %ebx, %ebx ; X86-NOBMI-NEXT: testb $32, %al ; X86-NOBMI-NEXT: je .LBB30_2 ; X86-NOBMI-NEXT: # %bb.1: -; X86-NOBMI-NEXT: movl %ebp, %ebx +; X86-NOBMI-NEXT: movl %ebp, %edx ; X86-NOBMI-NEXT: xorl %ebp, %ebp ; X86-NOBMI-NEXT: .LBB30_2: -; X86-NOBMI-NEXT: movl $-1, %esi ; X86-NOBMI-NEXT: movl $-1, %edi -; X86-NOBMI-NEXT: movl %edx, %ecx -; X86-NOBMI-NEXT: shll %cl, %edi -; X86-NOBMI-NEXT: shldl %cl, %esi, %esi -; X86-NOBMI-NEXT: testb $32, %dl -; X86-NOBMI-NEXT: je .LBB30_4 -; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %edi, %esi -; X86-NOBMI-NEXT: xorl %edi, %edi -; X86-NOBMI-NEXT: .LBB30_4: -; X86-NOBMI-NEXT: notl %esi -; X86-NOBMI-NEXT: andl %ebp, %esi +; X86-NOBMI-NEXT: movl $-1, %esi +; X86-NOBMI-NEXT: movb %ch, %cl +; X86-NOBMI-NEXT: shll %cl, %esi +; X86-NOBMI-NEXT: testb $32, %ch +; X86-NOBMI-NEXT: jne .LBB30_3 +; X86-NOBMI-NEXT: # %bb.4: +; X86-NOBMI-NEXT: movl %esi, %ebx +; X86-NOBMI-NEXT: jmp .LBB30_5 +; X86-NOBMI-NEXT: .LBB30_3: +; X86-NOBMI-NEXT: movl %esi, %edi +; X86-NOBMI-NEXT: .LBB30_5: ; X86-NOBMI-NEXT: notl %edi -; X86-NOBMI-NEXT: andl %ebx, %edi +; X86-NOBMI-NEXT: andl %ebp, %edi +; X86-NOBMI-NEXT: notl %ebx +; X86-NOBMI-NEXT: andl %edx, %ebx ; X86-NOBMI-NEXT: subl $8, %esp ; X86-NOBMI-NEXT: pushl {{[0-9]+}}(%esp) ; X86-NOBMI-NEXT: pushl %eax ; X86-NOBMI-NEXT: calll use64 ; X86-NOBMI-NEXT: addl $16, %esp -; X86-NOBMI-NEXT: movl %edi, %eax -; X86-NOBMI-NEXT: movl %esi, %edx +; X86-NOBMI-NEXT: movl %ebx, %eax +; X86-NOBMI-NEXT: movl %edi, %edx ; X86-NOBMI-NEXT: addl $12, %esp ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: popl %edi @@ -3447,7 +3444,6 @@ ; X86-BMI1NOTBM-NEXT: movl $-1, %ebp ; X86-BMI1NOTBM-NEXT: movl %edx, %ecx ; X86-BMI1NOTBM-NEXT: shll %cl, %ebp -; X86-BMI1NOTBM-NEXT: shldl %cl, %ebx, %ebx ; X86-BMI1NOTBM-NEXT: testb $32, %dl ; X86-BMI1NOTBM-NEXT: je .LBB30_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: @@ -3477,34 +3473,32 @@ ; X86-BMI1BMI2-NEXT: pushl %edi ; X86-BMI1BMI2-NEXT: pushl %esi ; X86-BMI1BMI2-NEXT: subl $12, %esp -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl ; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movl %eax, %ecx -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %edi -; X86-BMI1BMI2-NEXT: shrxl %eax, %esi, %esi -; X86-BMI1BMI2-NEXT: testb $32, %al +; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %edx +; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB30_2 ; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %esi, %edi -; X86-BMI1BMI2-NEXT: xorl %esi, %esi +; X86-BMI1BMI2-NEXT: movl %edx, %eax +; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: .LBB30_2: -; X86-BMI1BMI2-NEXT: movl $-1, %ebp -; X86-BMI1BMI2-NEXT: shlxl %edx, %ebp, %ebx -; X86-BMI1BMI2-NEXT: movl %edx, %ecx -; X86-BMI1BMI2-NEXT: shldl %cl, %ebp, %ebp -; X86-BMI1BMI2-NEXT: testb $32, %dl +; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-BMI1BMI2-NEXT: movl $-1, %esi +; X86-BMI1BMI2-NEXT: shlxl %ebx, %esi, %edi +; X86-BMI1BMI2-NEXT: testb $32, %bl ; X86-BMI1BMI2-NEXT: je .LBB30_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebx, %ebp -; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI1BMI2-NEXT: movl %edi, %esi +; X86-BMI1BMI2-NEXT: xorl %edi, %edi ; X86-BMI1BMI2-NEXT: .LBB30_4: -; X86-BMI1BMI2-NEXT: andnl %esi, %ebp, %esi -; X86-BMI1BMI2-NEXT: andnl %edi, %ebx, %edi +; X86-BMI1BMI2-NEXT: andnl %edx, %esi, %esi +; X86-BMI1BMI2-NEXT: andnl %eax, %edi, %edi ; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl {{[0-9]+}}(%esp) -; X86-BMI1BMI2-NEXT: pushl %eax +; X86-BMI1BMI2-NEXT: pushl %ebp +; X86-BMI1BMI2-NEXT: pushl %ecx ; X86-BMI1BMI2-NEXT: calll use64 ; X86-BMI1BMI2-NEXT: addl $16, %esp ; X86-BMI1BMI2-NEXT: movl %edi, %eax @@ -4888,7 +4882,6 @@ ; X86-NOBMI-NEXT: movl $-1, %ebp ; X86-NOBMI-NEXT: movl $-1, %ebx ; X86-NOBMI-NEXT: shrl %cl, %ebx -; X86-NOBMI-NEXT: shrdl %cl, %ebp, %ebp ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB41_4 ; X86-NOBMI-NEXT: # %bb.3: @@ -4935,7 +4928,6 @@ ; X86-BMI1NOTBM-NEXT: movl $-1, %ebp ; X86-BMI1NOTBM-NEXT: movl $-1, %ebx ; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebp, %ebp ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB41_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: @@ -4976,24 +4968,23 @@ ; X86-BMI1BMI2-NEXT: movl %edi, %esi ; X86-BMI1BMI2-NEXT: xorl %edi, %edi ; X86-BMI1BMI2-NEXT: .LBB41_2: -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $-1, %ebx -; X86-BMI1BMI2-NEXT: shrxl %ecx, %ebx, %ebp -; X86-BMI1BMI2-NEXT: shrdl %cl, %ebx, %ebx -; X86-BMI1BMI2-NEXT: testb $32, %cl +; X86-BMI1BMI2-NEXT: movb $64, %al +; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-BMI1BMI2-NEXT: movl $-1, %ebp +; X86-BMI1BMI2-NEXT: shrxl %eax, %ebp, %ebx +; X86-BMI1BMI2-NEXT: testb $32, %al ; X86-BMI1BMI2-NEXT: je .LBB41_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebp, %ebx -; X86-BMI1BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI1BMI2-NEXT: movl %ebx, %ebp +; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx ; X86-BMI1BMI2-NEXT: .LBB41_4: ; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %ebp ; X86-BMI1BMI2-NEXT: pushl %ebx +; X86-BMI1BMI2-NEXT: pushl %ebp ; X86-BMI1BMI2-NEXT: calll use64 ; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: andl %ebx, %esi -; X86-BMI1BMI2-NEXT: andl %ebp, %edi +; X86-BMI1BMI2-NEXT: andl %ebp, %esi +; X86-BMI1BMI2-NEXT: andl %ebx, %edi ; X86-BMI1BMI2-NEXT: movl %esi, %eax ; X86-BMI1BMI2-NEXT: movl %edi, %edx ; X86-BMI1BMI2-NEXT: addl $12, %esp @@ -5097,7 +5088,6 @@ ; X86-NOBMI-NEXT: movl $-1, %ebp ; X86-NOBMI-NEXT: movl $-1, %ebx ; X86-NOBMI-NEXT: shrl %cl, %ebx -; X86-NOBMI-NEXT: shrdl %cl, %ebp, %ebp ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB42_4 ; X86-NOBMI-NEXT: # %bb.3: @@ -5144,7 +5134,6 @@ ; X86-BMI1NOTBM-NEXT: movl $-1, %ebp ; X86-BMI1NOTBM-NEXT: movl $-1, %ebx ; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebp, %ebp ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB42_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: @@ -5185,24 +5174,23 @@ ; X86-BMI1BMI2-NEXT: movl %edi, %esi ; X86-BMI1BMI2-NEXT: xorl %edi, %edi ; X86-BMI1BMI2-NEXT: .LBB42_2: -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $-1, %ebx -; X86-BMI1BMI2-NEXT: shrxl %ecx, %ebx, %ebp -; X86-BMI1BMI2-NEXT: shrdl %cl, %ebx, %ebx -; X86-BMI1BMI2-NEXT: testb $32, %cl +; X86-BMI1BMI2-NEXT: movb $64, %al +; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-BMI1BMI2-NEXT: movl $-1, %ebp +; X86-BMI1BMI2-NEXT: shrxl %eax, %ebp, %ebx +; X86-BMI1BMI2-NEXT: testb $32, %al ; X86-BMI1BMI2-NEXT: je .LBB42_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebp, %ebx -; X86-BMI1BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI1BMI2-NEXT: movl %ebx, %ebp +; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx ; X86-BMI1BMI2-NEXT: .LBB42_4: ; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %ebp ; X86-BMI1BMI2-NEXT: pushl %ebx +; X86-BMI1BMI2-NEXT: pushl %ebp ; X86-BMI1BMI2-NEXT: calll use64 ; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: andl %ebx, %esi -; X86-BMI1BMI2-NEXT: andl %ebp, %edi +; X86-BMI1BMI2-NEXT: andl %ebp, %esi +; X86-BMI1BMI2-NEXT: andl %ebx, %edi ; X86-BMI1BMI2-NEXT: movl %esi, %eax ; X86-BMI1BMI2-NEXT: movl %edi, %edx ; X86-BMI1BMI2-NEXT: addl $12, %esp @@ -5310,7 +5298,6 @@ ; X86-NOBMI-NEXT: movl $-1, %ebp ; X86-NOBMI-NEXT: movl $-1, %ebx ; X86-NOBMI-NEXT: shrl %cl, %ebx -; X86-NOBMI-NEXT: shrdl %cl, %ebp, %ebp ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB43_4 ; X86-NOBMI-NEXT: # %bb.3: @@ -5358,7 +5345,6 @@ ; X86-BMI1NOTBM-NEXT: movl $-1, %ebp ; X86-BMI1NOTBM-NEXT: movl $-1, %ebx ; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebp, %ebp ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB43_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: @@ -5400,24 +5386,23 @@ ; X86-BMI1BMI2-NEXT: movl %edi, %esi ; X86-BMI1BMI2-NEXT: xorl %edi, %edi ; X86-BMI1BMI2-NEXT: .LBB43_2: -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $-1, %ebx -; X86-BMI1BMI2-NEXT: shrxl %ecx, %ebx, %ebp -; X86-BMI1BMI2-NEXT: shrdl %cl, %ebx, %ebx -; X86-BMI1BMI2-NEXT: testb $32, %cl +; X86-BMI1BMI2-NEXT: movb $64, %al +; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-BMI1BMI2-NEXT: movl $-1, %ebp +; X86-BMI1BMI2-NEXT: shrxl %eax, %ebp, %ebx +; X86-BMI1BMI2-NEXT: testb $32, %al ; X86-BMI1BMI2-NEXT: je .LBB43_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebp, %ebx -; X86-BMI1BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI1BMI2-NEXT: movl %ebx, %ebp +; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx ; X86-BMI1BMI2-NEXT: .LBB43_4: ; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %ebp ; X86-BMI1BMI2-NEXT: pushl %ebx +; X86-BMI1BMI2-NEXT: pushl %ebp ; X86-BMI1BMI2-NEXT: calll use64 ; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: andl %ebx, %esi -; X86-BMI1BMI2-NEXT: andl %ebp, %edi +; X86-BMI1BMI2-NEXT: andl %ebp, %esi +; X86-BMI1BMI2-NEXT: andl %ebx, %edi ; X86-BMI1BMI2-NEXT: movl %esi, %eax ; X86-BMI1BMI2-NEXT: movl %edi, %edx ; X86-BMI1BMI2-NEXT: addl $12, %esp @@ -5523,7 +5508,6 @@ ; X86-NOBMI-NEXT: movl $-1, %ebp ; X86-NOBMI-NEXT: movl $-1, %ebx ; X86-NOBMI-NEXT: shrl %cl, %ebx -; X86-NOBMI-NEXT: shrdl %cl, %ebp, %ebp ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB44_4 ; X86-NOBMI-NEXT: # %bb.3: @@ -5571,7 +5555,6 @@ ; X86-BMI1NOTBM-NEXT: movl $-1, %ebp ; X86-BMI1NOTBM-NEXT: movl $-1, %ebx ; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebp, %ebp ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB44_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: @@ -5613,24 +5596,23 @@ ; X86-BMI1BMI2-NEXT: movl %edi, %esi ; X86-BMI1BMI2-NEXT: xorl %edi, %edi ; X86-BMI1BMI2-NEXT: .LBB44_2: -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $-1, %ebx -; X86-BMI1BMI2-NEXT: shrxl %ecx, %ebx, %ebp -; X86-BMI1BMI2-NEXT: shrdl %cl, %ebx, %ebx -; X86-BMI1BMI2-NEXT: testb $32, %cl +; X86-BMI1BMI2-NEXT: movb $64, %al +; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-BMI1BMI2-NEXT: movl $-1, %ebp +; X86-BMI1BMI2-NEXT: shrxl %eax, %ebp, %ebx +; X86-BMI1BMI2-NEXT: testb $32, %al ; X86-BMI1BMI2-NEXT: je .LBB44_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebp, %ebx -; X86-BMI1BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI1BMI2-NEXT: movl %ebx, %ebp +; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx ; X86-BMI1BMI2-NEXT: .LBB44_4: ; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %ebp ; X86-BMI1BMI2-NEXT: pushl %ebx +; X86-BMI1BMI2-NEXT: pushl %ebp ; X86-BMI1BMI2-NEXT: calll use64 ; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: andl %ebx, %esi -; X86-BMI1BMI2-NEXT: andl %ebp, %edi +; X86-BMI1BMI2-NEXT: andl %ebp, %esi +; X86-BMI1BMI2-NEXT: andl %ebx, %edi ; X86-BMI1BMI2-NEXT: movl %esi, %eax ; X86-BMI1BMI2-NEXT: movl %edi, %edx ; X86-BMI1BMI2-NEXT: addl $12, %esp @@ -5738,7 +5720,6 @@ ; X86-NOBMI-NEXT: movl $-1, %ebp ; X86-NOBMI-NEXT: movl $-1, %ebx ; X86-NOBMI-NEXT: shrl %cl, %ebx -; X86-NOBMI-NEXT: shrdl %cl, %ebp, %ebp ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB45_4 ; X86-NOBMI-NEXT: # %bb.3: @@ -5785,7 +5766,6 @@ ; X86-BMI1NOTBM-NEXT: movl $-1, %ebp ; X86-BMI1NOTBM-NEXT: movl $-1, %ebx ; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebp, %ebp ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB45_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: @@ -5826,24 +5806,23 @@ ; X86-BMI1BMI2-NEXT: movl %edi, %esi ; X86-BMI1BMI2-NEXT: xorl %edi, %edi ; X86-BMI1BMI2-NEXT: .LBB45_2: -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $-1, %ebx -; X86-BMI1BMI2-NEXT: shrxl %ecx, %ebx, %ebp -; X86-BMI1BMI2-NEXT: shrdl %cl, %ebx, %ebx -; X86-BMI1BMI2-NEXT: testb $32, %cl +; X86-BMI1BMI2-NEXT: movb $64, %al +; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-BMI1BMI2-NEXT: movl $-1, %ebp +; X86-BMI1BMI2-NEXT: shrxl %eax, %ebp, %ebx +; X86-BMI1BMI2-NEXT: testb $32, %al ; X86-BMI1BMI2-NEXT: je .LBB45_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: movl %ebp, %ebx -; X86-BMI1BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI1BMI2-NEXT: movl %ebx, %ebp +; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx ; X86-BMI1BMI2-NEXT: .LBB45_4: ; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %ebp ; X86-BMI1BMI2-NEXT: pushl %ebx +; X86-BMI1BMI2-NEXT: pushl %ebp ; X86-BMI1BMI2-NEXT: calll use64 ; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: andl %ebx, %esi -; X86-BMI1BMI2-NEXT: andl %ebp, %edi +; X86-BMI1BMI2-NEXT: andl %ebp, %esi +; X86-BMI1BMI2-NEXT: andl %ebx, %edi ; X86-BMI1BMI2-NEXT: movl %esi, %eax ; X86-BMI1BMI2-NEXT: movl %edi, %edx ; X86-BMI1BMI2-NEXT: addl $12, %esp @@ -5947,7 +5926,6 @@ ; X86-NOBMI-NEXT: movl $-1, %ebx ; X86-NOBMI-NEXT: movl $-1, %ebp ; X86-NOBMI-NEXT: shrl %cl, %ebp -; X86-NOBMI-NEXT: shrdl %cl, %ebx, %ebx ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB46_4 ; X86-NOBMI-NEXT: # %bb.3: @@ -5999,7 +5977,6 @@ ; X86-BMI1NOTBM-NEXT: movl $-1, %ebx ; X86-BMI1NOTBM-NEXT: movl $-1, %ebp ; X86-BMI1NOTBM-NEXT: shrl %cl, %ebp -; X86-BMI1NOTBM-NEXT: shrdl %cl, %ebx, %ebx ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB46_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: @@ -6045,12 +6022,11 @@ ; X86-BMI1BMI2-NEXT: movl %edi, %esi ; X86-BMI1BMI2-NEXT: xorl %edi, %edi ; X86-BMI1BMI2-NEXT: .LBB46_2: -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-BMI1BMI2-NEXT: movb $64, %al +; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al ; X86-BMI1BMI2-NEXT: movl $-1, %ebp -; X86-BMI1BMI2-NEXT: shrxl %ecx, %ebp, %ebx -; X86-BMI1BMI2-NEXT: shrdl %cl, %ebp, %ebp -; X86-BMI1BMI2-NEXT: testb $32, %cl +; X86-BMI1BMI2-NEXT: shrxl %eax, %ebp, %ebx +; X86-BMI1BMI2-NEXT: testb $32, %al ; X86-BMI1BMI2-NEXT: je .LBB46_4 ; X86-BMI1BMI2-NEXT: # %bb.3: ; X86-BMI1BMI2-NEXT: movl %ebx, %ebp @@ -6175,14 +6151,12 @@ ; X86-NOBMI-NEXT: .LBB47_2: ; X86-NOBMI-NEXT: movb $64, %cl ; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-NOBMI-NEXT: movl $-1, %esi ; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: shrl %cl, %eax -; X86-NOBMI-NEXT: shrdl %cl, %esi, %esi ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: jne .LBB47_4 ; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %esi, %eax +; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: .LBB47_4: ; X86-NOBMI-NEXT: andl %edx, %eax ; X86-NOBMI-NEXT: popl %esi @@ -6204,14 +6178,12 @@ ; X86-BMI1NOTBM-NEXT: .LBB47_2: ; X86-BMI1NOTBM-NEXT: movb $64, %cl ; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %esi ; X86-BMI1NOTBM-NEXT: movl $-1, %eax ; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %esi ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: jne .LBB47_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %esi, %eax +; X86-BMI1NOTBM-NEXT: movl $-1, %eax ; X86-BMI1NOTBM-NEXT: .LBB47_4: ; X86-BMI1NOTBM-NEXT: andl %edx, %eax ; X86-BMI1NOTBM-NEXT: popl %esi @@ -6219,7 +6191,6 @@ ; ; X86-BMI1BMI2-LABEL: bextr64_32_c0: ; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi ; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -6231,16 +6202,13 @@ ; X86-BMI1BMI2-NEXT: .LBB47_2: ; X86-BMI1BMI2-NEXT: movb $64, %cl ; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $-1, %esi ; X86-BMI1BMI2-NEXT: movl $-1, %eax -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %eax ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB47_4 ; X86-BMI1BMI2-NEXT: # %bb.3: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %eax +; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %eax ; X86-BMI1BMI2-NEXT: .LBB47_4: ; X86-BMI1BMI2-NEXT: andl %edx, %eax -; X86-BMI1BMI2-NEXT: popl %esi ; X86-BMI1BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bextr64_32_c0: diff --git a/llvm/test/CodeGen/X86/extract-lowbits.ll b/llvm/test/CodeGen/X86/extract-lowbits.ll --- a/llvm/test/CodeGen/X86/extract-lowbits.ll +++ b/llvm/test/CodeGen/X86/extract-lowbits.ll @@ -1356,58 +1356,56 @@ define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi64_b0: ; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $-1, %edx -; X86-NOBMI-NEXT: movl $-1, %eax -; X86-NOBMI-NEXT: shll %cl, %eax -; X86-NOBMI-NEXT: shldl %cl, %edx, %edx -; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: je .LBB20_2 -; X86-NOBMI-NEXT: # %bb.1: -; X86-NOBMI-NEXT: movl %eax, %edx +; X86-NOBMI-NEXT: movl $-1, %esi +; X86-NOBMI-NEXT: shll %cl, %esi ; X86-NOBMI-NEXT: xorl %eax, %eax -; X86-NOBMI-NEXT: .LBB20_2: +; X86-NOBMI-NEXT: testb $32, %cl +; X86-NOBMI-NEXT: jne .LBB20_1 +; X86-NOBMI-NEXT: # %bb.2: +; X86-NOBMI-NEXT: movl %esi, %eax +; X86-NOBMI-NEXT: jmp .LBB20_3 +; X86-NOBMI-NEXT: .LBB20_1: +; X86-NOBMI-NEXT: movl %esi, %edx +; X86-NOBMI-NEXT: .LBB20_3: ; X86-NOBMI-NEXT: notl %edx ; X86-NOBMI-NEXT: notl %eax -; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; ; X86-BMI1NOTBM-LABEL: bzhi64_b0: ; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi ; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1NOTBM-NEXT: movl $-1, %edx ; X86-BMI1NOTBM-NEXT: movl $-1, %eax -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: shll %cl, %esi -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %eax +; X86-BMI1NOTBM-NEXT: shll %cl, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB20_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: xorl %esi, %esi +; X86-BMI1NOTBM-NEXT: movl %eax, %edx +; X86-BMI1NOTBM-NEXT: xorl %eax, %eax ; X86-BMI1NOTBM-NEXT: .LBB20_2: -; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %edx -; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %esi, %eax -; X86-BMI1NOTBM-NEXT: popl %esi +; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax +; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bzhi64_b0: ; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $-1, %eax -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %esi -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %cl +; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI1BMI2-NEXT: movl $-1, %ecx +; X86-BMI1BMI2-NEXT: shlxl %edx, %ecx, %eax +; X86-BMI1BMI2-NEXT: testb $32, %dl ; X86-BMI1BMI2-NEXT: je .LBB20_2 ; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: xorl %esi, %esi +; X86-BMI1BMI2-NEXT: movl %eax, %ecx +; X86-BMI1BMI2-NEXT: xorl %eax, %eax ; X86-BMI1BMI2-NEXT: .LBB20_2: -; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %edx -; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %esi, %eax -; X86-BMI1BMI2-NEXT: popl %esi +; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax +; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx ; X86-BMI1BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_b0: @@ -1439,58 +1437,56 @@ define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi64_b1_indexzext: ; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $-1, %edx -; X86-NOBMI-NEXT: movl $-1, %eax -; X86-NOBMI-NEXT: shll %cl, %eax -; X86-NOBMI-NEXT: shldl %cl, %edx, %edx -; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: je .LBB21_2 -; X86-NOBMI-NEXT: # %bb.1: -; X86-NOBMI-NEXT: movl %eax, %edx +; X86-NOBMI-NEXT: movl $-1, %esi +; X86-NOBMI-NEXT: shll %cl, %esi ; X86-NOBMI-NEXT: xorl %eax, %eax -; X86-NOBMI-NEXT: .LBB21_2: +; X86-NOBMI-NEXT: testb $32, %cl +; X86-NOBMI-NEXT: jne .LBB21_1 +; X86-NOBMI-NEXT: # %bb.2: +; X86-NOBMI-NEXT: movl %esi, %eax +; X86-NOBMI-NEXT: jmp .LBB21_3 +; X86-NOBMI-NEXT: .LBB21_1: +; X86-NOBMI-NEXT: movl %esi, %edx +; X86-NOBMI-NEXT: .LBB21_3: ; X86-NOBMI-NEXT: notl %edx ; X86-NOBMI-NEXT: notl %eax -; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; ; X86-BMI1NOTBM-LABEL: bzhi64_b1_indexzext: ; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi ; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1NOTBM-NEXT: movl $-1, %edx ; X86-BMI1NOTBM-NEXT: movl $-1, %eax -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: shll %cl, %esi -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %eax +; X86-BMI1NOTBM-NEXT: shll %cl, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB21_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: xorl %esi, %esi +; X86-BMI1NOTBM-NEXT: movl %eax, %edx +; X86-BMI1NOTBM-NEXT: xorl %eax, %eax ; X86-BMI1NOTBM-NEXT: .LBB21_2: -; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %edx -; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %esi, %eax -; X86-BMI1NOTBM-NEXT: popl %esi +; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax +; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bzhi64_b1_indexzext: ; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $-1, %eax -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %esi -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %cl +; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI1BMI2-NEXT: movl $-1, %ecx +; X86-BMI1BMI2-NEXT: shlxl %edx, %ecx, %eax +; X86-BMI1BMI2-NEXT: testb $32, %dl ; X86-BMI1BMI2-NEXT: je .LBB21_2 ; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: xorl %esi, %esi +; X86-BMI1BMI2-NEXT: movl %eax, %ecx +; X86-BMI1BMI2-NEXT: xorl %eax, %eax ; X86-BMI1BMI2-NEXT: .LBB21_2: -; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %edx -; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %esi, %eax -; X86-BMI1BMI2-NEXT: popl %esi +; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax +; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx ; X86-BMI1BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_b1_indexzext: @@ -1525,63 +1521,65 @@ define i64 @bzhi64_b2_load(i64* %w, i64 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi64_b2_load: ; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $-1, %edx -; X86-NOBMI-NEXT: movl $-1, %eax -; X86-NOBMI-NEXT: shll %cl, %eax -; X86-NOBMI-NEXT: shldl %cl, %edx, %edx -; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: je .LBB22_2 -; X86-NOBMI-NEXT: # %bb.1: -; X86-NOBMI-NEXT: movl %eax, %edx +; X86-NOBMI-NEXT: movl $-1, %edi +; X86-NOBMI-NEXT: shll %cl, %edi ; X86-NOBMI-NEXT: xorl %eax, %eax -; X86-NOBMI-NEXT: .LBB22_2: +; X86-NOBMI-NEXT: testb $32, %cl +; X86-NOBMI-NEXT: jne .LBB22_1 +; X86-NOBMI-NEXT: # %bb.2: +; X86-NOBMI-NEXT: movl %edi, %eax +; X86-NOBMI-NEXT: jmp .LBB22_3 +; X86-NOBMI-NEXT: .LBB22_1: +; X86-NOBMI-NEXT: movl %edi, %edx +; X86-NOBMI-NEXT: .LBB22_3: ; X86-NOBMI-NEXT: notl %edx ; X86-NOBMI-NEXT: notl %eax -; X86-NOBMI-NEXT: andl 4(%esi), %edx ; X86-NOBMI-NEXT: andl (%esi), %eax +; X86-NOBMI-NEXT: andl 4(%esi), %edx ; X86-NOBMI-NEXT: popl %esi +; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; ; X86-BMI1NOTBM-LABEL: bzhi64_b2_load: ; X86-BMI1NOTBM: # %bb.0: ; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %edx ; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: shll %cl, %esi -; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %edx +; X86-BMI1NOTBM-NEXT: movl $-1, %eax +; X86-BMI1NOTBM-NEXT: shll %cl, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB22_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %esi, %edx -; X86-BMI1NOTBM-NEXT: xorl %esi, %esi +; X86-BMI1NOTBM-NEXT: movl %eax, %esi +; X86-BMI1NOTBM-NEXT: xorl %eax, %eax ; X86-BMI1NOTBM-NEXT: .LBB22_2: -; X86-BMI1NOTBM-NEXT: andnl 4(%eax), %edx, %edx -; X86-BMI1NOTBM-NEXT: andnl (%eax), %esi, %eax +; X86-BMI1NOTBM-NEXT: andnl (%edx), %eax, %eax +; X86-BMI1NOTBM-NEXT: andnl 4(%edx), %esi, %edx ; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bzhi64_b2_load: ; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1BMI2-NEXT: pushl %ebx +; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl ; X86-BMI1BMI2-NEXT: movl $-1, %edx -; X86-BMI1BMI2-NEXT: shlxl %ecx, %edx, %esi -; X86-BMI1BMI2-NEXT: shldl %cl, %edx, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl +; X86-BMI1BMI2-NEXT: shlxl %ebx, %edx, %eax +; X86-BMI1BMI2-NEXT: testb $32, %bl ; X86-BMI1BMI2-NEXT: je .LBB22_2 ; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %esi, %edx -; X86-BMI1BMI2-NEXT: xorl %esi, %esi +; X86-BMI1BMI2-NEXT: movl %eax, %edx +; X86-BMI1BMI2-NEXT: xorl %eax, %eax ; X86-BMI1BMI2-NEXT: .LBB22_2: -; X86-BMI1BMI2-NEXT: andnl 4(%eax), %edx, %edx -; X86-BMI1BMI2-NEXT: andnl (%eax), %esi, %eax -; X86-BMI1BMI2-NEXT: popl %esi +; X86-BMI1BMI2-NEXT: andnl (%ecx), %eax, %eax +; X86-BMI1BMI2-NEXT: andnl 4(%ecx), %edx, %edx +; X86-BMI1BMI2-NEXT: popl %ebx ; X86-BMI1BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_b2_load: @@ -1614,63 +1612,65 @@ define i64 @bzhi64_b3_load_indexzext(i64* %w, i8 zeroext %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi64_b3_load_indexzext: ; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $-1, %edx -; X86-NOBMI-NEXT: movl $-1, %eax -; X86-NOBMI-NEXT: shll %cl, %eax -; X86-NOBMI-NEXT: shldl %cl, %edx, %edx -; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: je .LBB23_2 -; X86-NOBMI-NEXT: # %bb.1: -; X86-NOBMI-NEXT: movl %eax, %edx +; X86-NOBMI-NEXT: movl $-1, %edi +; X86-NOBMI-NEXT: shll %cl, %edi ; X86-NOBMI-NEXT: xorl %eax, %eax -; X86-NOBMI-NEXT: .LBB23_2: +; X86-NOBMI-NEXT: testb $32, %cl +; X86-NOBMI-NEXT: jne .LBB23_1 +; X86-NOBMI-NEXT: # %bb.2: +; X86-NOBMI-NEXT: movl %edi, %eax +; X86-NOBMI-NEXT: jmp .LBB23_3 +; X86-NOBMI-NEXT: .LBB23_1: +; X86-NOBMI-NEXT: movl %edi, %edx +; X86-NOBMI-NEXT: .LBB23_3: ; X86-NOBMI-NEXT: notl %edx ; X86-NOBMI-NEXT: notl %eax -; X86-NOBMI-NEXT: andl 4(%esi), %edx ; X86-NOBMI-NEXT: andl (%esi), %eax +; X86-NOBMI-NEXT: andl 4(%esi), %edx ; X86-NOBMI-NEXT: popl %esi +; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: retl ; ; X86-BMI1NOTBM-LABEL: bzhi64_b3_load_indexzext: ; X86-BMI1NOTBM: # %bb.0: ; X86-BMI1NOTBM-NEXT: pushl %esi -; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %edx ; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: shll %cl, %esi -; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %edx +; X86-BMI1NOTBM-NEXT: movl $-1, %eax +; X86-BMI1NOTBM-NEXT: shll %cl, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB23_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %esi, %edx -; X86-BMI1NOTBM-NEXT: xorl %esi, %esi +; X86-BMI1NOTBM-NEXT: movl %eax, %esi +; X86-BMI1NOTBM-NEXT: xorl %eax, %eax ; X86-BMI1NOTBM-NEXT: .LBB23_2: -; X86-BMI1NOTBM-NEXT: andnl 4(%eax), %edx, %edx -; X86-BMI1NOTBM-NEXT: andnl (%eax), %esi, %eax +; X86-BMI1NOTBM-NEXT: andnl (%edx), %eax, %eax +; X86-BMI1NOTBM-NEXT: andnl 4(%edx), %esi, %edx ; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bzhi64_b3_load_indexzext: ; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1BMI2-NEXT: pushl %ebx +; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl ; X86-BMI1BMI2-NEXT: movl $-1, %edx -; X86-BMI1BMI2-NEXT: shlxl %ecx, %edx, %esi -; X86-BMI1BMI2-NEXT: shldl %cl, %edx, %edx -; X86-BMI1BMI2-NEXT: testb $32, %cl +; X86-BMI1BMI2-NEXT: shlxl %ebx, %edx, %eax +; X86-BMI1BMI2-NEXT: testb $32, %bl ; X86-BMI1BMI2-NEXT: je .LBB23_2 ; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %esi, %edx -; X86-BMI1BMI2-NEXT: xorl %esi, %esi +; X86-BMI1BMI2-NEXT: movl %eax, %edx +; X86-BMI1BMI2-NEXT: xorl %eax, %eax ; X86-BMI1BMI2-NEXT: .LBB23_2: -; X86-BMI1BMI2-NEXT: andnl 4(%eax), %edx, %edx -; X86-BMI1BMI2-NEXT: andnl (%eax), %esi, %eax -; X86-BMI1BMI2-NEXT: popl %esi +; X86-BMI1BMI2-NEXT: andnl (%ecx), %eax, %eax +; X86-BMI1BMI2-NEXT: andnl 4(%ecx), %edx, %edx +; X86-BMI1BMI2-NEXT: popl %ebx ; X86-BMI1BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_b3_load_indexzext: @@ -1706,58 +1706,56 @@ define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi64_b4_commutative: ; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $-1, %edx -; X86-NOBMI-NEXT: movl $-1, %eax -; X86-NOBMI-NEXT: shll %cl, %eax -; X86-NOBMI-NEXT: shldl %cl, %edx, %edx -; X86-NOBMI-NEXT: testb $32, %cl -; X86-NOBMI-NEXT: je .LBB24_2 -; X86-NOBMI-NEXT: # %bb.1: -; X86-NOBMI-NEXT: movl %eax, %edx +; X86-NOBMI-NEXT: movl $-1, %esi +; X86-NOBMI-NEXT: shll %cl, %esi ; X86-NOBMI-NEXT: xorl %eax, %eax -; X86-NOBMI-NEXT: .LBB24_2: +; X86-NOBMI-NEXT: testb $32, %cl +; X86-NOBMI-NEXT: jne .LBB24_1 +; X86-NOBMI-NEXT: # %bb.2: +; X86-NOBMI-NEXT: movl %esi, %eax +; X86-NOBMI-NEXT: jmp .LBB24_3 +; X86-NOBMI-NEXT: .LBB24_1: +; X86-NOBMI-NEXT: movl %esi, %edx +; X86-NOBMI-NEXT: .LBB24_3: ; X86-NOBMI-NEXT: notl %edx ; X86-NOBMI-NEXT: notl %eax -; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: retl ; ; X86-BMI1NOTBM-LABEL: bzhi64_b4_commutative: ; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi ; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1NOTBM-NEXT: movl $-1, %edx ; X86-BMI1NOTBM-NEXT: movl $-1, %eax -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: shll %cl, %esi -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %eax +; X86-BMI1NOTBM-NEXT: shll %cl, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB24_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: xorl %esi, %esi +; X86-BMI1NOTBM-NEXT: movl %eax, %edx +; X86-BMI1NOTBM-NEXT: xorl %eax, %eax ; X86-BMI1NOTBM-NEXT: .LBB24_2: -; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %edx -; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %esi, %eax -; X86-BMI1NOTBM-NEXT: popl %esi +; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax +; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bzhi64_b4_commutative: ; X86-BMI1BMI2: # %bb.0: -; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $-1, %eax -; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %esi -; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %cl +; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI1BMI2-NEXT: movl $-1, %ecx +; X86-BMI1BMI2-NEXT: shlxl %edx, %ecx, %eax +; X86-BMI1BMI2-NEXT: testb $32, %dl ; X86-BMI1BMI2-NEXT: je .LBB24_2 ; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: xorl %esi, %esi +; X86-BMI1BMI2-NEXT: movl %eax, %ecx +; X86-BMI1BMI2-NEXT: xorl %eax, %eax ; X86-BMI1BMI2-NEXT: .LBB24_2: -; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %edx -; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %esi, %eax -; X86-BMI1BMI2-NEXT: popl %esi +; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax +; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx ; X86-BMI1BMI2-NEXT: retl ; ; X64-NOBMI-LABEL: bzhi64_b4_commutative: @@ -2628,7 +2626,6 @@ ; X86-NOBMI-NEXT: movl $-1, %esi ; X86-NOBMI-NEXT: movl $-1, %edi ; X86-NOBMI-NEXT: shrl %cl, %edi -; X86-NOBMI-NEXT: shrdl %cl, %esi, %esi ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB34_2 ; X86-NOBMI-NEXT: # %bb.1: @@ -2659,7 +2656,6 @@ ; X86-BMI1NOTBM-NEXT: movl $-1, %esi ; X86-BMI1NOTBM-NEXT: movl $-1, %edi ; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %esi ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB34_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: @@ -2685,26 +2681,25 @@ ; X86-BMI1BMI2-NEXT: pushl %edi ; X86-BMI1BMI2-NEXT: pushl %esi ; X86-BMI1BMI2-NEXT: pushl %eax -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $-1, %esi -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edi -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %esi -; X86-BMI1BMI2-NEXT: testb $32, %cl +; X86-BMI1BMI2-NEXT: movb $64, %al +; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-BMI1BMI2-NEXT: movl $-1, %edi +; X86-BMI1BMI2-NEXT: shrxl %eax, %edi, %esi +; X86-BMI1BMI2-NEXT: testb $32, %al ; X86-BMI1BMI2-NEXT: je .LBB34_2 ; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: xorl %edi, %edi +; X86-BMI1BMI2-NEXT: movl %esi, %edi +; X86-BMI1BMI2-NEXT: xorl %esi, %esi ; X86-BMI1BMI2-NEXT: .LBB34_2: ; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %edi ; X86-BMI1BMI2-NEXT: pushl %esi +; X86-BMI1BMI2-NEXT: pushl %edi ; X86-BMI1BMI2-NEXT: calll use64 ; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi ; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: movl %edi, %edx +; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-BMI1BMI2-NEXT: movl %edi, %eax +; X86-BMI1BMI2-NEXT: movl %esi, %edx ; X86-BMI1BMI2-NEXT: addl $4, %esp ; X86-BMI1BMI2-NEXT: popl %esi ; X86-BMI1BMI2-NEXT: popl %edi @@ -2785,7 +2780,6 @@ ; X86-NOBMI-NEXT: movl $-1, %esi ; X86-NOBMI-NEXT: movl $-1, %edi ; X86-NOBMI-NEXT: shrl %cl, %edi -; X86-NOBMI-NEXT: shrdl %cl, %esi, %esi ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB35_2 ; X86-NOBMI-NEXT: # %bb.1: @@ -2816,7 +2810,6 @@ ; X86-BMI1NOTBM-NEXT: movl $-1, %esi ; X86-BMI1NOTBM-NEXT: movl $-1, %edi ; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %esi ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB35_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: @@ -2842,26 +2835,25 @@ ; X86-BMI1BMI2-NEXT: pushl %edi ; X86-BMI1BMI2-NEXT: pushl %esi ; X86-BMI1BMI2-NEXT: pushl %eax -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $-1, %esi -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edi -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %esi -; X86-BMI1BMI2-NEXT: testb $32, %cl +; X86-BMI1BMI2-NEXT: movb $64, %al +; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-BMI1BMI2-NEXT: movl $-1, %edi +; X86-BMI1BMI2-NEXT: shrxl %eax, %edi, %esi +; X86-BMI1BMI2-NEXT: testb $32, %al ; X86-BMI1BMI2-NEXT: je .LBB35_2 ; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: xorl %edi, %edi +; X86-BMI1BMI2-NEXT: movl %esi, %edi +; X86-BMI1BMI2-NEXT: xorl %esi, %esi ; X86-BMI1BMI2-NEXT: .LBB35_2: ; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %edi ; X86-BMI1BMI2-NEXT: pushl %esi +; X86-BMI1BMI2-NEXT: pushl %edi ; X86-BMI1BMI2-NEXT: calll use64 ; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi ; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: movl %edi, %edx +; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-BMI1BMI2-NEXT: movl %edi, %eax +; X86-BMI1BMI2-NEXT: movl %esi, %edx ; X86-BMI1BMI2-NEXT: addl $4, %esp ; X86-BMI1BMI2-NEXT: popl %esi ; X86-BMI1BMI2-NEXT: popl %edi @@ -2944,24 +2936,23 @@ ; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: movl $-1, %ebx ; X86-NOBMI-NEXT: shrl %cl, %ebx -; X86-NOBMI-NEXT: shrdl %cl, %eax, %eax ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB36_2 ; X86-NOBMI-NEXT: # %bb.1: ; X86-NOBMI-NEXT: movl %ebx, %eax ; X86-NOBMI-NEXT: xorl %ebx, %ebx ; X86-NOBMI-NEXT: .LBB36_2: -; X86-NOBMI-NEXT: movl (%edx), %esi -; X86-NOBMI-NEXT: andl %eax, %esi -; X86-NOBMI-NEXT: movl 4(%edx), %edi -; X86-NOBMI-NEXT: andl %ebx, %edi +; X86-NOBMI-NEXT: movl 4(%edx), %esi +; X86-NOBMI-NEXT: andl %ebx, %esi +; X86-NOBMI-NEXT: movl (%edx), %edi +; X86-NOBMI-NEXT: andl %eax, %edi ; X86-NOBMI-NEXT: subl $8, %esp ; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %eax ; X86-NOBMI-NEXT: calll use64 ; X86-NOBMI-NEXT: addl $16, %esp -; X86-NOBMI-NEXT: movl %esi, %eax -; X86-NOBMI-NEXT: movl %edi, %edx +; X86-NOBMI-NEXT: movl %edi, %eax +; X86-NOBMI-NEXT: movl %esi, %edx ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: popl %ebx @@ -2978,24 +2969,23 @@ ; X86-BMI1NOTBM-NEXT: movl $-1, %eax ; X86-BMI1NOTBM-NEXT: movl $-1, %ebx ; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB36_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: ; X86-BMI1NOTBM-NEXT: movl %ebx, %eax ; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx ; X86-BMI1NOTBM-NEXT: .LBB36_2: -; X86-BMI1NOTBM-NEXT: movl (%edx), %esi -; X86-BMI1NOTBM-NEXT: andl %eax, %esi -; X86-BMI1NOTBM-NEXT: movl 4(%edx), %edi -; X86-BMI1NOTBM-NEXT: andl %ebx, %edi +; X86-BMI1NOTBM-NEXT: movl 4(%edx), %esi +; X86-BMI1NOTBM-NEXT: andl %ebx, %esi +; X86-BMI1NOTBM-NEXT: movl (%edx), %edi +; X86-BMI1NOTBM-NEXT: andl %eax, %edi ; X86-BMI1NOTBM-NEXT: subl $8, %esp ; X86-BMI1NOTBM-NEXT: pushl %ebx ; X86-BMI1NOTBM-NEXT: pushl %eax ; X86-BMI1NOTBM-NEXT: calll use64 ; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: movl %edi, %edx +; X86-BMI1NOTBM-NEXT: movl %edi, %eax +; X86-BMI1NOTBM-NEXT: movl %esi, %edx ; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: popl %edi ; X86-BMI1NOTBM-NEXT: popl %ebx @@ -3006,29 +2996,28 @@ ; X86-BMI1BMI2-NEXT: pushl %ebx ; X86-BMI1BMI2-NEXT: pushl %edi ; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $-1, %eax -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %ebx -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %cl +; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1BMI2-NEXT: movb $64, %bl +; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %bl +; X86-BMI1BMI2-NEXT: movl $-1, %ecx +; X86-BMI1BMI2-NEXT: shrxl %ebx, %ecx, %edx +; X86-BMI1BMI2-NEXT: testb $32, %bl ; X86-BMI1BMI2-NEXT: je .LBB36_2 ; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %ebx, %eax -; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI1BMI2-NEXT: movl %edx, %ecx +; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: .LBB36_2: -; X86-BMI1BMI2-NEXT: movl (%edx), %esi -; X86-BMI1BMI2-NEXT: andl %eax, %esi -; X86-BMI1BMI2-NEXT: movl 4(%edx), %edi -; X86-BMI1BMI2-NEXT: andl %ebx, %edi +; X86-BMI1BMI2-NEXT: movl 4(%eax), %esi +; X86-BMI1BMI2-NEXT: andl %edx, %esi +; X86-BMI1BMI2-NEXT: movl (%eax), %edi +; X86-BMI1BMI2-NEXT: andl %ecx, %edi ; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %eax +; X86-BMI1BMI2-NEXT: pushl %edx +; X86-BMI1BMI2-NEXT: pushl %ecx ; X86-BMI1BMI2-NEXT: calll use64 ; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: movl %edi, %edx +; X86-BMI1BMI2-NEXT: movl %edi, %eax +; X86-BMI1BMI2-NEXT: movl %esi, %edx ; X86-BMI1BMI2-NEXT: popl %esi ; X86-BMI1BMI2-NEXT: popl %edi ; X86-BMI1BMI2-NEXT: popl %ebx @@ -3098,24 +3087,23 @@ ; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: movl $-1, %ebx ; X86-NOBMI-NEXT: shrl %cl, %ebx -; X86-NOBMI-NEXT: shrdl %cl, %eax, %eax ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB37_2 ; X86-NOBMI-NEXT: # %bb.1: ; X86-NOBMI-NEXT: movl %ebx, %eax ; X86-NOBMI-NEXT: xorl %ebx, %ebx ; X86-NOBMI-NEXT: .LBB37_2: -; X86-NOBMI-NEXT: movl (%edx), %esi -; X86-NOBMI-NEXT: andl %eax, %esi -; X86-NOBMI-NEXT: movl 4(%edx), %edi -; X86-NOBMI-NEXT: andl %ebx, %edi +; X86-NOBMI-NEXT: movl 4(%edx), %esi +; X86-NOBMI-NEXT: andl %ebx, %esi +; X86-NOBMI-NEXT: movl (%edx), %edi +; X86-NOBMI-NEXT: andl %eax, %edi ; X86-NOBMI-NEXT: subl $8, %esp ; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %eax ; X86-NOBMI-NEXT: calll use64 ; X86-NOBMI-NEXT: addl $16, %esp -; X86-NOBMI-NEXT: movl %esi, %eax -; X86-NOBMI-NEXT: movl %edi, %edx +; X86-NOBMI-NEXT: movl %edi, %eax +; X86-NOBMI-NEXT: movl %esi, %edx ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: popl %edi ; X86-NOBMI-NEXT: popl %ebx @@ -3132,24 +3120,23 @@ ; X86-BMI1NOTBM-NEXT: movl $-1, %eax ; X86-BMI1NOTBM-NEXT: movl $-1, %ebx ; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx -; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB37_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: ; X86-BMI1NOTBM-NEXT: movl %ebx, %eax ; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx ; X86-BMI1NOTBM-NEXT: .LBB37_2: -; X86-BMI1NOTBM-NEXT: movl (%edx), %esi -; X86-BMI1NOTBM-NEXT: andl %eax, %esi -; X86-BMI1NOTBM-NEXT: movl 4(%edx), %edi -; X86-BMI1NOTBM-NEXT: andl %ebx, %edi +; X86-BMI1NOTBM-NEXT: movl 4(%edx), %esi +; X86-BMI1NOTBM-NEXT: andl %ebx, %esi +; X86-BMI1NOTBM-NEXT: movl (%edx), %edi +; X86-BMI1NOTBM-NEXT: andl %eax, %edi ; X86-BMI1NOTBM-NEXT: subl $8, %esp ; X86-BMI1NOTBM-NEXT: pushl %ebx ; X86-BMI1NOTBM-NEXT: pushl %eax ; X86-BMI1NOTBM-NEXT: calll use64 ; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: movl %edi, %edx +; X86-BMI1NOTBM-NEXT: movl %edi, %eax +; X86-BMI1NOTBM-NEXT: movl %esi, %edx ; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: popl %edi ; X86-BMI1NOTBM-NEXT: popl %ebx @@ -3160,29 +3147,28 @@ ; X86-BMI1BMI2-NEXT: pushl %ebx ; X86-BMI1BMI2-NEXT: pushl %edi ; X86-BMI1BMI2-NEXT: pushl %esi -; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $-1, %eax -; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %ebx -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %eax -; X86-BMI1BMI2-NEXT: testb $32, %cl +; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1BMI2-NEXT: movb $64, %bl +; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %bl +; X86-BMI1BMI2-NEXT: movl $-1, %ecx +; X86-BMI1BMI2-NEXT: shrxl %ebx, %ecx, %edx +; X86-BMI1BMI2-NEXT: testb $32, %bl ; X86-BMI1BMI2-NEXT: je .LBB37_2 ; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %ebx, %eax -; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI1BMI2-NEXT: movl %edx, %ecx +; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: .LBB37_2: -; X86-BMI1BMI2-NEXT: movl (%edx), %esi -; X86-BMI1BMI2-NEXT: andl %eax, %esi -; X86-BMI1BMI2-NEXT: movl 4(%edx), %edi -; X86-BMI1BMI2-NEXT: andl %ebx, %edi +; X86-BMI1BMI2-NEXT: movl 4(%eax), %esi +; X86-BMI1BMI2-NEXT: andl %edx, %esi +; X86-BMI1BMI2-NEXT: movl (%eax), %edi +; X86-BMI1BMI2-NEXT: andl %ecx, %edi ; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %ebx -; X86-BMI1BMI2-NEXT: pushl %eax +; X86-BMI1BMI2-NEXT: pushl %edx +; X86-BMI1BMI2-NEXT: pushl %ecx ; X86-BMI1BMI2-NEXT: calll use64 ; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: movl %edi, %edx +; X86-BMI1BMI2-NEXT: movl %edi, %eax +; X86-BMI1BMI2-NEXT: movl %esi, %edx ; X86-BMI1BMI2-NEXT: popl %esi ; X86-BMI1BMI2-NEXT: popl %edi ; X86-BMI1BMI2-NEXT: popl %ebx @@ -3253,7 +3239,6 @@ ; X86-NOBMI-NEXT: movl $-1, %esi ; X86-NOBMI-NEXT: movl $-1, %edi ; X86-NOBMI-NEXT: shrl %cl, %edi -; X86-NOBMI-NEXT: shrdl %cl, %esi, %esi ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB38_2 ; X86-NOBMI-NEXT: # %bb.1: @@ -3284,7 +3269,6 @@ ; X86-BMI1NOTBM-NEXT: movl $-1, %esi ; X86-BMI1NOTBM-NEXT: movl $-1, %edi ; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %esi ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB38_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: @@ -3310,26 +3294,25 @@ ; X86-BMI1BMI2-NEXT: pushl %edi ; X86-BMI1BMI2-NEXT: pushl %esi ; X86-BMI1BMI2-NEXT: pushl %eax -; X86-BMI1BMI2-NEXT: movb $64, %cl -; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $-1, %esi -; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edi -; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %esi -; X86-BMI1BMI2-NEXT: testb $32, %cl +; X86-BMI1BMI2-NEXT: movb $64, %al +; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-BMI1BMI2-NEXT: movl $-1, %edi +; X86-BMI1BMI2-NEXT: shrxl %eax, %edi, %esi +; X86-BMI1BMI2-NEXT: testb $32, %al ; X86-BMI1BMI2-NEXT: je .LBB38_2 ; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: movl %edi, %esi -; X86-BMI1BMI2-NEXT: xorl %edi, %edi +; X86-BMI1BMI2-NEXT: movl %esi, %edi +; X86-BMI1BMI2-NEXT: xorl %esi, %esi ; X86-BMI1BMI2-NEXT: .LBB38_2: ; X86-BMI1BMI2-NEXT: subl $8, %esp -; X86-BMI1BMI2-NEXT: pushl %edi ; X86-BMI1BMI2-NEXT: pushl %esi +; X86-BMI1BMI2-NEXT: pushl %edi ; X86-BMI1BMI2-NEXT: calll use64 ; X86-BMI1BMI2-NEXT: addl $16, %esp -; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi ; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi -; X86-BMI1BMI2-NEXT: movl %esi, %eax -; X86-BMI1BMI2-NEXT: movl %edi, %edx +; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-BMI1BMI2-NEXT: movl %edi, %eax +; X86-BMI1BMI2-NEXT: movl %esi, %edx ; X86-BMI1BMI2-NEXT: addl $4, %esp ; X86-BMI1BMI2-NEXT: popl %esi ; X86-BMI1BMI2-NEXT: popl %edi @@ -3407,14 +3390,12 @@ ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: movb $64, %cl ; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-NOBMI-NEXT: movl $-1, %edx ; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: shrl %cl, %eax -; X86-NOBMI-NEXT: shrdl %cl, %edx, %edx ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: jne .LBB39_2 ; X86-NOBMI-NEXT: # %bb.1: -; X86-NOBMI-NEXT: movl %edx, %eax +; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: .LBB39_2: ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl @@ -3423,14 +3404,12 @@ ; X86-BMI1NOTBM: # %bb.0: ; X86-BMI1NOTBM-NEXT: movb $64, %cl ; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %edx ; X86-BMI1NOTBM-NEXT: movl $-1, %eax ; X86-BMI1NOTBM-NEXT: shrl %cl, %eax -; X86-BMI1NOTBM-NEXT: shrdl %cl, %edx, %edx ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: jne .LBB39_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %edx, %eax +; X86-BMI1NOTBM-NEXT: movl $-1, %eax ; X86-BMI1NOTBM-NEXT: .LBB39_2: ; X86-BMI1NOTBM-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-BMI1NOTBM-NEXT: retl @@ -3439,13 +3418,11 @@ ; X86-BMI1BMI2: # %bb.0: ; X86-BMI1BMI2-NEXT: movb $64, %cl ; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1BMI2-NEXT: movl $-1, %edx ; X86-BMI1BMI2-NEXT: movl $-1, %eax -; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %eax ; X86-BMI1BMI2-NEXT: testb $32, %cl ; X86-BMI1BMI2-NEXT: je .LBB39_2 ; X86-BMI1BMI2-NEXT: # %bb.1: -; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %eax +; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %eax ; X86-BMI1BMI2-NEXT: .LBB39_2: ; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-BMI1BMI2-NEXT: retl diff --git a/llvm/test/CodeGen/X86/fshl.ll b/llvm/test/CodeGen/X86/fshl.ll --- a/llvm/test/CodeGen/X86/fshl.ll +++ b/llvm/test/CodeGen/X86/fshl.ll @@ -587,14 +587,9 @@ define i64 @combine_fshl_load_i64(i64* %p) nounwind { ; X86-FAST-LABEL: combine_fshl_load_i64: ; X86-FAST: # %bb.0: -; X86-FAST-NEXT: pushl %esi ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-FAST-NEXT: movl 12(%ecx), %eax -; X86-FAST-NEXT: movl 16(%ecx), %esi -; X86-FAST-NEXT: movl 20(%ecx), %edx -; X86-FAST-NEXT: shldl $24, %esi, %edx -; X86-FAST-NEXT: shrdl $8, %esi, %eax -; X86-FAST-NEXT: popl %esi +; X86-FAST-NEXT: movl 13(%ecx), %eax +; X86-FAST-NEXT: movl 17(%ecx), %edx ; X86-FAST-NEXT: retl ; ; X86-SLOW-LABEL: combine_fshl_load_i64: diff --git a/llvm/test/CodeGen/X86/fshr.ll b/llvm/test/CodeGen/X86/fshr.ll --- a/llvm/test/CodeGen/X86/fshr.ll +++ b/llvm/test/CodeGen/X86/fshr.ll @@ -582,16 +582,9 @@ define i64 @combine_fshr_load_i64(i64* %p) nounwind { ; X86-FAST-LABEL: combine_fshr_load_i64: ; X86-FAST: # %bb.0: -; X86-FAST-NEXT: pushl %esi -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-FAST-NEXT: movzbl 11(%eax), %ecx -; X86-FAST-NEXT: movl 12(%eax), %esi -; X86-FAST-NEXT: movl 16(%eax), %edx -; X86-FAST-NEXT: shldl $8, %esi, %edx -; X86-FAST-NEXT: movl %esi, %eax -; X86-FAST-NEXT: shll $8, %eax -; X86-FAST-NEXT: orl %ecx, %eax -; X86-FAST-NEXT: popl %esi +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-FAST-NEXT: movl 11(%ecx), %eax +; X86-FAST-NEXT: movl 15(%ecx), %edx ; X86-FAST-NEXT: retl ; ; X86-SLOW-LABEL: combine_fshr_load_i64: diff --git a/llvm/test/CodeGen/X86/shift-combine.ll b/llvm/test/CodeGen/X86/shift-combine.ll --- a/llvm/test/CodeGen/X86/shift-combine.ll +++ b/llvm/test/CodeGen/X86/shift-combine.ll @@ -290,7 +290,6 @@ ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: shrdl $8, %edx, %eax ; X32-NEXT: shrl $8, %edx ; X32-NEXT: incl %edx ; X32-NEXT: shrdl $8, %edx, %eax diff --git a/llvm/test/CodeGen/X86/shift-parts.ll b/llvm/test/CodeGen/X86/shift-parts.ll --- a/llvm/test/CodeGen/X86/shift-parts.ll +++ b/llvm/test/CodeGen/X86/shift-parts.ll @@ -10,15 +10,14 @@ ; CHECK-LABEL: int87: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq g_144+{{.*}}(%rip), %rax -; CHECK-NEXT: movq g_144+{{.*}}(%rip), %rdx -; CHECK-NEXT: movzbl %sil, %ecx -; CHECK-NEXT: shll $6, %ecx +; CHECK-NEXT: movq g_144+{{.*}}(%rip), %rcx +; CHECK-NEXT: movzbl %sil, %edx +; CHECK-NEXT: shll $6, %edx ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_1: # %for.cond ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movq %rdx, %rsi -; CHECK-NEXT: shrdq %cl, %rax, %rsi -; CHECK-NEXT: testb $64, %cl +; CHECK-NEXT: testb $64, %dl +; CHECK-NEXT: movq %rcx, %rsi ; CHECK-NEXT: cmovneq %rax, %rsi ; CHECK-NEXT: orl $0, %esi ; CHECK-NEXT: je .LBB0_1