diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -929,11 +929,19 @@ return RepRegClassCostForVT[VT.SimpleTy]; } - /// Return true if SHIFT instructions should be expanded to SHIFT_PARTS - /// instructions, and false if a library call is preferred (e.g for code-size - /// reasons). - virtual bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const { - return true; + /// Return the preferred strategy to legalize tihs SHIFT instruction, with + /// \p ExpansionFactor being the recursion depth - how many expansion needed. + enum class ShiftLegalizationStrategy { + ExpandToParts, + ExpandThroughStack, + LowerToLibcall + }; + virtual ShiftLegalizationStrategy + preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, + unsigned ExpansionFactor) const { + if (ExpansionFactor == 1) + return ShiftLegalizationStrategy::ExpandToParts; + return ShiftLegalizationStrategy::ExpandThroughStack; } /// Return true if the target has native support for the specified value type. diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -184,6 +184,7 @@ SDValue ExpandExtractFromVectorThroughStack(SDValue Op); SDValue ExpandInsertToVectorThroughStack(SDValue Op); SDValue ExpandVectorBuildThroughStack(SDNode* Node); + SDValue ExpandScalarSrlThroughStack(SDNode *Node); SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP); SDValue ExpandConstant(ConstantSDNode *CP); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -4110,6 +4110,92 @@ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); } +bool DAGTypeLegalizer::ExpandIntRes_ShiftThroughStack(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + SDValue Shiftee = N->getOperand(0); + EVT VT = Shiftee.getValueType(); + SDValue ShAmt = N->getOperand(1); + EVT ShAmtVT = ShAmt.getValueType(); + uint64_t ShAmtVTBitWidth = ShAmtVT.getScalarSizeInBits(); + assert(ShAmtVTBitWidth % 8 == 0 && "Shift amount type is not byte multiple?"); + + // This legalization only works if the shift is by a multiple of byte width, + // %x * 8 <-> %x << 3 so 3 low bits *must* be known zero. + if (DAG.computeKnownBits(ShAmt).countMinTrailingZeros() < 3) + return false; + + uint64_t VTBitWidth = VT.getScalarSizeInBits(); + assert(VTBitWidth % 8 == 0 && "Shifting a not byte multiple value?"); + uint64_t VTByteWidth = VTBitWidth / 8; + uint64_t StackSlotByteWidth = 2 * VTByteWidth; + + // Get a temporary stack slot 2x the width of our VT. + // FIXME: reuse stack slots? + SDValue StackPtr = DAG.CreateStackTemporary( + TypeSize::getFixed(StackSlotByteWidth), Align(1)); + SDValue Ch = DAG.getEntryNode(); + + SDValue StackSlotSpillHalf = StackPtr; + SDValue StackSlotPaddingHalf = DAG.getMemBasePlusOffset( + StackPtr, DAG.getConstant(VTByteWidth, dl, StackPtr.getValueType()), dl); + + // We have exactly two strategies here: + // 1. spill into low half, pad high half, index upwards into low half + // 2. zero-out low half, pad high half, index downwards into low half + // We pick 1. for right shifts and 2. for left-shift on little-endian machine, + // or vice versa on big-endian machine. + bool ShouldSpillToHighHalf = N->getOpcode() == ISD::SHL; + if (DAG.getDataLayout().isBigEndian()) + ShouldSpillToHighHalf = !ShouldSpillToHighHalf; + + if (ShouldSpillToHighHalf) + std::swap(StackSlotSpillHalf, StackSlotPaddingHalf); + + // Pad one of the stack slot halfs. + SDValue AllZeros = DAG.getConstant(0, dl, VT); + SDValue Padding; + if (N->getOpcode() != ISD::SRA) { + // For logical shifts, padding is all-zeros. + Padding = AllZeros; + } else { + // For arithmetic right-shift, padding is sign bit of the value. + SDValue ShifteeIsNegative = + DAG.getSetCC(dl, MVT::i1, Shiftee, AllZeros, ISD::SETLT); + Padding = DAG.getNode(ISD::SIGN_EXTEND, dl, VT, ShifteeIsNegative); + } + Ch = DAG.getStore(Ch, dl, Padding, StackSlotPaddingHalf, MachinePointerInfo(), + Align(1)); + // And spill the value, that is being shifted, into another half. + Ch = DAG.getStore(Ch, dl, Shiftee, StackSlotSpillHalf, MachinePointerInfo(), + Align(1)); + + // Now, compute the byte offset into stack slot. We have shift amount, + // which is in bits, but in multiples of byte. So just divide by CHAR_BIT. + SDNodeFlags Flags; + Flags.setExact(true); + SDValue ByteOffset = DAG.getNode(ISD::SRL, dl, ShAmtVT, ShAmt, + DAG.getConstant(3, dl, ShAmtVT), Flags); + // And clamp it, because OOB load is an immediate UB, + // while shift overflow would have *just* been poison. + // NOTE: we can clamp to either VTByteWidth or VTByteWidth-1. + ByteOffset = DAG.getNode(ISD::UMIN, dl, ShAmtVT, ByteOffset, + DAG.getConstant(VTByteWidth, dl, ShAmtVT)); + if (ShouldSpillToHighHalf) + ByteOffset = DAG.getNegative(ByteOffset, dl, ShAmtVT); + + // Get the pointer into stack slot from which we need to load. + ByteOffset = DAG.getZExtOrTrunc(ByteOffset, dl, StackPtr.getValueType()); + SDValue AdjStackPtr = + DAG.getMemBasePlusOffset(StackSlotSpillHalf, ByteOffset, dl); + // And load it! While the load is not legal, legalizing it is obvious. + SDValue Res = + DAG.getLoad(VT, dl, Ch, AdjStackPtr, MachinePointerInfo(), Align(1)); + // Finally, split the loaded value. + SplitInteger(Res, Lo, Hi); + return true; +} + void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); @@ -4145,7 +4231,25 @@ (Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) || Action == TargetLowering::Custom; - if (LegalOrCustom && TLI.shouldExpandShift(DAG, N)) { + unsigned ExpansionFactor = 1; + // That VT->NVT expansion is one step. But will we re-expand NVT? + for (EVT TmpVT = NVT;;) { + EVT NewTMPVT = TLI.getTypeToTransformTo(*DAG.getContext(), TmpVT); + if (NewTMPVT == TmpVT) + break; + TmpVT = NewTMPVT; + ++ExpansionFactor; + } + + TargetLowering::ShiftLegalizationStrategy S = + TLI.preferredShiftLegalizationStrategy(DAG, N, ExpansionFactor); + + if (S == TargetLowering::ShiftLegalizationStrategy::ExpandThroughStack) + if (ExpandIntRes_ShiftThroughStack(N, Lo, Hi)) + return; + + if (LegalOrCustom && + S != TargetLowering::ShiftLegalizationStrategy::LowerToLibcall) { // Expand the subcomponents. SDValue LHSL, LHSH; GetExpandedInteger(N->getOperand(0), LHSL, LHSH); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -457,6 +457,7 @@ void ExpandIntRes_SREM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_UDIV (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_UREM (SDNode *N, SDValue &Lo, SDValue &Hi); + bool ExpandIntRes_ShiftThroughStack(SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_Shift (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_MINMAX (SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -799,7 +799,9 @@ unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override; - bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override; + ShiftLegalizationStrategy + preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, + unsigned ExpansionFactor) const override; bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -22424,12 +22424,14 @@ return X.getValueType().isScalarInteger() || NewShiftOpcode == ISD::SHL; } -bool AArch64TargetLowering::shouldExpandShift(SelectionDAG &DAG, - SDNode *N) const { +TargetLowering::ShiftLegalizationStrategy +AArch64TargetLowering::preferredShiftLegalizationStrategy( + SelectionDAG &DAG, SDNode *N, unsigned int ExpansionFactor) const { if (DAG.getMachineFunction().getFunction().hasMinSize() && !Subtarget->isTargetWindows() && !Subtarget->isTargetDarwin()) - return false; - return true; + return ShiftLegalizationStrategy::LowerToLibcall; + return TargetLowering::preferredShiftLegalizationStrategy(DAG, N, + ExpansionFactor); } void AArch64TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -697,7 +697,9 @@ return HasStandaloneRem; } - bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override; + ShiftLegalizationStrategy + preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, + unsigned ExpansionFactor) const override; CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const; CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -21234,8 +21234,13 @@ : ARM_AM::getSOImmVal(MaskVal)) != -1; } -bool ARMTargetLowering::shouldExpandShift(SelectionDAG &DAG, SDNode *N) const { - return !Subtarget->hasMinSize() || Subtarget->isTargetWindows(); +TargetLowering::ShiftLegalizationStrategy +ARMTargetLowering::preferredShiftLegalizationStrategy( + SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const { + if (Subtarget->hasMinSize() && !Subtarget->isTargetWindows()) + return ShiftLegalizationStrategy::LowerToLibcall; + return TargetLowering::preferredShiftLegalizationStrategy(DAG, N, + ExpansionFactor); } Value *ARMTargetLowering::emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -479,11 +479,15 @@ return ISD::SIGN_EXTEND; } - bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override { + TargetLowering::ShiftLegalizationStrategy + preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, + unsigned ExpansionFactor) const override { if (DAG.getMachineFunction().getFunction().hasMinSize()) - return false; - return true; + return ShiftLegalizationStrategy::LowerToLibcall; + return TargetLowering::preferredShiftLegalizationStrategy(DAG, N, + ExpansionFactor); } + bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1115,7 +1115,9 @@ return VTIsOk(XVT) && VTIsOk(KeptBitsVT); } - bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override; + ShiftLegalizationStrategy + preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, + unsigned ExpansionFactor) const override; bool shouldSplatInsEltVarIndex(EVT VT) const override; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -6045,12 +6045,14 @@ return true; } -bool X86TargetLowering::shouldExpandShift(SelectionDAG &DAG, - SDNode *N) const { +TargetLowering::ShiftLegalizationStrategy +X86TargetLowering::preferredShiftLegalizationStrategy( + SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const { if (DAG.getMachineFunction().getFunction().hasMinSize() && !Subtarget.isOSWindows()) - return false; - return true; + return ShiftLegalizationStrategy::LowerToLibcall; + return TargetLowering::preferredShiftLegalizationStrategy(DAG, N, + ExpansionFactor); } bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const { diff --git a/llvm/test/CodeGen/AArch64/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/AArch64/wide-scalar-shift-legalization.ll --- a/llvm/test/CodeGen/AArch64/wide-scalar-shift-legalization.ll +++ b/llvm/test/CodeGen/AArch64/wide-scalar-shift-legalization.ll @@ -179,62 +179,25 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; ALL-LABEL: lshr_32bytes: ; ALL: // %bb.0: +; ALL-NEXT: sub sp, sp, #64 ; ALL-NEXT: ldr x9, [x1] -; ALL-NEXT: mov w8, #128 -; ALL-NEXT: ldp x11, x10, [x0, #8] -; ALL-NEXT: lsl x9, x9, #3 -; ALL-NEXT: ldr x12, [x0] -; ALL-NEXT: sub x8, x8, x9 -; ALL-NEXT: ldr x13, [x0, #24] -; ALL-NEXT: and x17, x8, #0x38 -; ALL-NEXT: mvn w0, w8 -; ALL-NEXT: lsr x14, x10, #1 -; ALL-NEXT: and x15, x9, #0x38 -; ALL-NEXT: mvn w16, w9 -; ALL-NEXT: tst x8, #0x40 -; ALL-NEXT: lsl x3, x13, x17 -; ALL-NEXT: lsr x14, x14, x0 -; ALL-NEXT: lsl x17, x10, x17 -; ALL-NEXT: orr x14, x3, x14 -; ALL-NEXT: lsl x18, x13, #1 -; ALL-NEXT: csel x0, xzr, x17, ne -; ALL-NEXT: csel x14, x17, x14, ne -; ALL-NEXT: lsl x17, x11, #1 -; ALL-NEXT: lsr x8, x10, x15 -; ALL-NEXT: lsl x1, x18, x16 -; ALL-NEXT: lsr x3, x12, x15 -; ALL-NEXT: lsl x16, x17, x16 -; ALL-NEXT: orr x8, x1, x8 -; ALL-NEXT: lsr x1, x13, x15 -; ALL-NEXT: tst x9, #0x40 -; ALL-NEXT: orr x16, x16, x3 -; ALL-NEXT: lsr x15, x11, x15 -; ALL-NEXT: csel x8, x1, x8, ne -; ALL-NEXT: csel x16, x15, x16, ne -; ALL-NEXT: csel x15, xzr, x15, ne -; ALL-NEXT: csel x17, xzr, x1, ne -; ALL-NEXT: subs x1, x9, #128 -; ALL-NEXT: and x3, x1, #0x38 -; ALL-NEXT: mvn w4, w1 -; ALL-NEXT: csel x17, x17, xzr, lo -; ALL-NEXT: tst x1, #0x40 -; ALL-NEXT: orr x16, x16, x0 -; ALL-NEXT: orr x14, x15, x14 -; ALL-NEXT: lsr x10, x10, x3 -; ALL-NEXT: lsl x18, x18, x4 -; ALL-NEXT: orr x10, x18, x10 -; ALL-NEXT: lsr x13, x13, x3 -; ALL-NEXT: csel x10, x13, x10, ne -; ALL-NEXT: csel x13, xzr, x13, ne -; ALL-NEXT: cmp x9, #128 -; ALL-NEXT: csel x10, x16, x10, lo -; ALL-NEXT: csel x8, x8, xzr, lo -; ALL-NEXT: csel x13, x14, x13, lo -; ALL-NEXT: cmp x9, #0 -; ALL-NEXT: csel x9, x12, x10, eq -; ALL-NEXT: csel x10, x11, x13, eq -; ALL-NEXT: stp x8, x17, [x2, #16] -; ALL-NEXT: stp x9, x10, [x2] +; ALL-NEXT: mov w8, #32 +; ALL-NEXT: ldp x11, x10, [x0, #16] +; ALL-NEXT: movi v0.2d, #0000000000000000 +; ALL-NEXT: and x9, x9, #0x1fffffffffffffff +; ALL-NEXT: ldr q1, [x0] +; ALL-NEXT: cmp x9, #32 +; ALL-NEXT: csel x8, x9, x8, lo +; ALL-NEXT: mov x9, sp +; ALL-NEXT: add x8, x9, x8 +; ALL-NEXT: stp q0, q0, [sp, #32] +; ALL-NEXT: stp x11, x10, [sp, #16] +; ALL-NEXT: str q1, [sp] +; ALL-NEXT: ldr q0, [x8] +; ALL-NEXT: ldp x9, x8, [x8, #16] +; ALL-NEXT: str q0, [x2] +; ALL-NEXT: stp x9, x8, [x2, #16] +; ALL-NEXT: add sp, sp, #64 ; ALL-NEXT: ret %src = load i256, ptr %src.ptr, align 1 %byteOff = load i256, ptr %byteOff.ptr, align 1 @@ -246,62 +209,26 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; ALL-LABEL: shl_32bytes: ; ALL: // %bb.0: +; ALL-NEXT: sub sp, sp, #64 ; ALL-NEXT: ldr x9, [x1] -; ALL-NEXT: mov w8, #128 -; ALL-NEXT: ldp x10, x11, [x0, #8] -; ALL-NEXT: lsl x9, x9, #3 -; ALL-NEXT: ldr x12, [x0, #24] -; ALL-NEXT: sub x8, x8, x9 -; ALL-NEXT: ldr x13, [x0] -; ALL-NEXT: and x17, x8, #0x38 -; ALL-NEXT: mvn w0, w8 -; ALL-NEXT: lsl x14, x10, #1 -; ALL-NEXT: and x15, x9, #0x38 -; ALL-NEXT: mvn w16, w9 -; ALL-NEXT: tst x8, #0x40 -; ALL-NEXT: lsr x3, x13, x17 -; ALL-NEXT: lsl x14, x14, x0 -; ALL-NEXT: lsr x17, x10, x17 -; ALL-NEXT: orr x14, x14, x3 -; ALL-NEXT: lsr x18, x13, #1 -; ALL-NEXT: csel x0, xzr, x17, ne -; ALL-NEXT: csel x14, x17, x14, ne -; ALL-NEXT: lsr x17, x11, #1 -; ALL-NEXT: lsl x8, x10, x15 -; ALL-NEXT: lsr x1, x18, x16 -; ALL-NEXT: lsl x3, x12, x15 -; ALL-NEXT: lsr x16, x17, x16 -; ALL-NEXT: orr x8, x8, x1 -; ALL-NEXT: lsl x1, x13, x15 -; ALL-NEXT: tst x9, #0x40 -; ALL-NEXT: orr x16, x3, x16 -; ALL-NEXT: lsl x15, x11, x15 -; ALL-NEXT: csel x8, x1, x8, ne -; ALL-NEXT: csel x16, x15, x16, ne -; ALL-NEXT: csel x15, xzr, x15, ne -; ALL-NEXT: csel x17, xzr, x1, ne -; ALL-NEXT: subs x1, x9, #128 -; ALL-NEXT: and x3, x1, #0x38 -; ALL-NEXT: mvn w4, w1 -; ALL-NEXT: csel x17, x17, xzr, lo -; ALL-NEXT: tst x1, #0x40 -; ALL-NEXT: orr x16, x16, x0 -; ALL-NEXT: orr x14, x15, x14 -; ALL-NEXT: lsl x10, x10, x3 -; ALL-NEXT: lsr x18, x18, x4 -; ALL-NEXT: orr x10, x10, x18 -; ALL-NEXT: lsl x13, x13, x3 -; ALL-NEXT: csel x10, x13, x10, ne -; ALL-NEXT: csel x13, xzr, x13, ne -; ALL-NEXT: cmp x9, #128 -; ALL-NEXT: csel x10, x16, x10, lo -; ALL-NEXT: csel x8, x8, xzr, lo -; ALL-NEXT: csel x13, x14, x13, lo -; ALL-NEXT: cmp x9, #0 -; ALL-NEXT: csel x9, x12, x10, eq -; ALL-NEXT: csel x10, x11, x13, eq -; ALL-NEXT: stp x17, x8, [x2] -; ALL-NEXT: stp x10, x9, [x2, #16] +; ALL-NEXT: mov w8, #32 +; ALL-NEXT: ldp x11, x10, [x0, #16] +; ALL-NEXT: movi v0.2d, #0000000000000000 +; ALL-NEXT: and x9, x9, #0x1fffffffffffffff +; ALL-NEXT: ldr q1, [x0] +; ALL-NEXT: cmp x9, #32 +; ALL-NEXT: csel x8, x9, x8, lo +; ALL-NEXT: mov x9, sp +; ALL-NEXT: add x9, x9, #32 +; ALL-NEXT: stp q0, q0, [sp] +; ALL-NEXT: sub x8, x9, x8 +; ALL-NEXT: stp x11, x10, [sp, #48] +; ALL-NEXT: str q1, [sp, #32] +; ALL-NEXT: ldr q0, [x8] +; ALL-NEXT: ldp x9, x8, [x8, #16] +; ALL-NEXT: str q0, [x2] +; ALL-NEXT: stp x9, x8, [x2, #16] +; ALL-NEXT: add sp, sp, #64 ; ALL-NEXT: ret %src = load i256, ptr %src.ptr, align 1 %byteOff = load i256, ptr %byteOff.ptr, align 1 @@ -313,63 +240,26 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; ALL-LABEL: ashr_32bytes: ; ALL: // %bb.0: +; ALL-NEXT: sub sp, sp, #64 ; ALL-NEXT: ldr x9, [x1] -; ALL-NEXT: mov w8, #128 -; ALL-NEXT: ldp x11, x10, [x0, #8] -; ALL-NEXT: lsl x9, x9, #3 -; ALL-NEXT: ldr x12, [x0] -; ALL-NEXT: sub x8, x8, x9 -; ALL-NEXT: ldr x13, [x0, #24] -; ALL-NEXT: and x18, x8, #0x38 -; ALL-NEXT: mvn w0, w8 -; ALL-NEXT: lsr x14, x10, #1 -; ALL-NEXT: and x15, x9, #0x38 -; ALL-NEXT: mvn w16, w9 -; ALL-NEXT: lsl x17, x13, #1 -; ALL-NEXT: lsl x4, x13, x18 -; ALL-NEXT: lsr x14, x14, x0 -; ALL-NEXT: tst x8, #0x40 -; ALL-NEXT: lsl x18, x10, x18 -; ALL-NEXT: orr x14, x4, x14 -; ALL-NEXT: lsr x8, x10, x15 -; ALL-NEXT: lsl x1, x17, x16 -; ALL-NEXT: csel x0, xzr, x18, ne -; ALL-NEXT: csel x14, x18, x14, ne -; ALL-NEXT: lsl x18, x11, #1 -; ALL-NEXT: orr x8, x1, x8 -; ALL-NEXT: lsr x1, x12, x15 -; ALL-NEXT: lsl x16, x18, x16 -; ALL-NEXT: asr x3, x13, x15 -; ALL-NEXT: tst x9, #0x40 -; ALL-NEXT: orr x16, x16, x1 -; ALL-NEXT: lsr x15, x11, x15 -; ALL-NEXT: asr x18, x13, #63 -; ALL-NEXT: csel x8, x3, x8, ne -; ALL-NEXT: csel x16, x15, x16, ne -; ALL-NEXT: csel x15, xzr, x15, ne -; ALL-NEXT: csel x1, x18, x3, ne -; ALL-NEXT: subs x3, x9, #128 -; ALL-NEXT: orr x16, x16, x0 -; ALL-NEXT: and x4, x3, #0x38 -; ALL-NEXT: mvn w5, w3 -; ALL-NEXT: orr x14, x15, x14 -; ALL-NEXT: lsr x10, x10, x4 -; ALL-NEXT: lsl x17, x17, x5 -; ALL-NEXT: orr x10, x17, x10 -; ALL-NEXT: csel x17, x1, x18, lo -; ALL-NEXT: asr x13, x13, x4 -; ALL-NEXT: tst x3, #0x40 -; ALL-NEXT: csel x10, x13, x10, ne -; ALL-NEXT: csel x13, x18, x13, ne -; ALL-NEXT: cmp x9, #128 -; ALL-NEXT: csel x10, x16, x10, lo -; ALL-NEXT: csel x8, x8, x18, lo -; ALL-NEXT: csel x13, x14, x13, lo -; ALL-NEXT: cmp x9, #0 -; ALL-NEXT: csel x9, x12, x10, eq -; ALL-NEXT: csel x10, x11, x13, eq -; ALL-NEXT: stp x8, x17, [x2, #16] -; ALL-NEXT: stp x9, x10, [x2] +; ALL-NEXT: mov w8, #32 +; ALL-NEXT: ldp x11, x10, [x0, #16] +; ALL-NEXT: and x9, x9, #0x1fffffffffffffff +; ALL-NEXT: ldr q0, [x0] +; ALL-NEXT: cmp x9, #32 +; ALL-NEXT: csel x8, x9, x8, lo +; ALL-NEXT: mov x9, sp +; ALL-NEXT: asr x12, x10, #63 +; ALL-NEXT: add x8, x9, x8 +; ALL-NEXT: stp x11, x10, [sp, #16] +; ALL-NEXT: str q0, [sp] +; ALL-NEXT: stp x12, x12, [sp, #48] +; ALL-NEXT: stp x12, x12, [sp, #32] +; ALL-NEXT: ldr q0, [x8] +; ALL-NEXT: ldp x9, x8, [x8, #16] +; ALL-NEXT: str q0, [x2] +; ALL-NEXT: stp x9, x8, [x2, #16] +; ALL-NEXT: add sp, sp, #64 ; ALL-NEXT: ret %src = load i256, ptr %src.ptr, align 1 %byteOff = load i256, ptr %byteOff.ptr, align 1 diff --git a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll --- a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll +++ b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll @@ -226,93 +226,40 @@ ; ; LE-32BIT-LABEL: lshr_16bytes: ; LE-32BIT: # %bb.0: -; LE-32BIT-NEXT: stwu 1, -32(1) +; LE-32BIT-NEXT: stwu 1, -48(1) ; LE-32BIT-NEXT: lwz 4, 12(4) -; LE-32BIT-NEXT: li 8, 0 -; LE-32BIT-NEXT: lwz 6, 8(3) -; LE-32BIT-NEXT: lwz 7, 12(3) -; LE-32BIT-NEXT: rlwinm. 4, 4, 3, 0, 28 -; LE-32BIT-NEXT: lwz 9, 4(3) -; LE-32BIT-NEXT: subfic 10, 4, 96 -; LE-32BIT-NEXT: lwz 3, 0(3) -; LE-32BIT-NEXT: addi 11, 4, -64 -; LE-32BIT-NEXT: stw 27, 12(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: cmplwi 1, 4, 64 -; LE-32BIT-NEXT: stw 28, 16(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: srw 28, 3, 4 -; LE-32BIT-NEXT: stw 30, 24(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: subfic 30, 4, 32 -; LE-32BIT-NEXT: slw 10, 3, 10 -; LE-32BIT-NEXT: srw 27, 9, 11 -; LE-32BIT-NEXT: addi 12, 4, -96 -; LE-32BIT-NEXT: srw 0, 7, 4 -; LE-32BIT-NEXT: or 10, 27, 10 -; LE-32BIT-NEXT: slw 27, 6, 30 -; LE-32BIT-NEXT: bc 12, 4, .LBB6_2 -; LE-32BIT-NEXT: # %bb.1: -; LE-32BIT-NEXT: ori 28, 8, 0 +; LE-32BIT-NEXT: li 6, 0 +; LE-32BIT-NEXT: lwz 7, 4(3) +; LE-32BIT-NEXT: lwz 8, 0(3) +; LE-32BIT-NEXT: clrlwi 4, 4, 3 +; LE-32BIT-NEXT: lwz 9, 12(3) +; LE-32BIT-NEXT: cmplwi 4, 16 +; LE-32BIT-NEXT: lwz 3, 8(3) +; LE-32BIT-NEXT: stw 6, 28(1) +; LE-32BIT-NEXT: stw 3, 40(1) +; LE-32BIT-NEXT: li 3, 16 +; LE-32BIT-NEXT: stw 6, 24(1) +; LE-32BIT-NEXT: bc 12, 0, .LBB6_1 ; LE-32BIT-NEXT: b .LBB6_2 +; LE-32BIT-NEXT: .LBB6_1: +; LE-32BIT-NEXT: addi 3, 4, 0 ; LE-32BIT-NEXT: .LBB6_2: -; LE-32BIT-NEXT: stw 29, 20(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: srw 29, 9, 4 -; LE-32BIT-NEXT: or 0, 0, 27 -; LE-32BIT-NEXT: slw 27, 3, 30 -; LE-32BIT-NEXT: stw 28, 0(5) -; LE-32BIT-NEXT: subfic 28, 4, 64 -; LE-32BIT-NEXT: srw 12, 3, 12 -; LE-32BIT-NEXT: or 29, 29, 27 -; LE-32BIT-NEXT: addi 27, 4, -32 -; LE-32BIT-NEXT: or 10, 10, 12 -; LE-32BIT-NEXT: subfic 12, 28, 32 -; LE-32BIT-NEXT: slw 30, 9, 30 -; LE-32BIT-NEXT: srw 12, 9, 12 -; LE-32BIT-NEXT: slw 9, 9, 28 -; LE-32BIT-NEXT: slw 28, 3, 28 -; LE-32BIT-NEXT: srw 11, 3, 11 -; LE-32BIT-NEXT: srw 3, 3, 27 -; LE-32BIT-NEXT: srw 27, 6, 27 -; LE-32BIT-NEXT: or 0, 0, 27 -; LE-32BIT-NEXT: or 12, 28, 12 -; LE-32BIT-NEXT: srw 4, 6, 4 -; LE-32BIT-NEXT: or 3, 29, 3 -; LE-32BIT-NEXT: or 9, 0, 9 -; LE-32BIT-NEXT: or 12, 12, 30 -; LE-32BIT-NEXT: bc 12, 4, .LBB6_4 -; LE-32BIT-NEXT: # %bb.3: -; LE-32BIT-NEXT: ori 3, 8, 0 -; LE-32BIT-NEXT: ori 8, 10, 0 -; LE-32BIT-NEXT: b .LBB6_5 -; LE-32BIT-NEXT: .LBB6_4: -; LE-32BIT-NEXT: addi 8, 9, 0 -; LE-32BIT-NEXT: .LBB6_5: -; LE-32BIT-NEXT: or 4, 4, 12 -; LE-32BIT-NEXT: stw 3, 4(5) -; LE-32BIT-NEXT: bc 12, 2, .LBB6_7 -; LE-32BIT-NEXT: # %bb.6: -; LE-32BIT-NEXT: ori 3, 8, 0 -; LE-32BIT-NEXT: b .LBB6_8 -; LE-32BIT-NEXT: .LBB6_7: -; LE-32BIT-NEXT: addi 3, 7, 0 -; LE-32BIT-NEXT: .LBB6_8: -; LE-32BIT-NEXT: bc 12, 4, .LBB6_10 -; LE-32BIT-NEXT: # %bb.9: -; LE-32BIT-NEXT: ori 4, 11, 0 -; LE-32BIT-NEXT: b .LBB6_10 -; LE-32BIT-NEXT: .LBB6_10: +; LE-32BIT-NEXT: stw 6, 20(1) +; LE-32BIT-NEXT: stw 6, 16(1) +; LE-32BIT-NEXT: addi 6, 1, 32 +; LE-32BIT-NEXT: stw 9, 44(1) +; LE-32BIT-NEXT: sub 3, 6, 3 +; LE-32BIT-NEXT: stw 8, 32(1) +; LE-32BIT-NEXT: stw 7, 36(1) +; LE-32BIT-NEXT: lwz 4, 4(3) +; LE-32BIT-NEXT: lwz 6, 0(3) +; LE-32BIT-NEXT: lwz 7, 8(3) +; LE-32BIT-NEXT: lwz 3, 12(3) +; LE-32BIT-NEXT: stw 7, 8(5) ; LE-32BIT-NEXT: stw 3, 12(5) -; LE-32BIT-NEXT: bc 12, 2, .LBB6_12 -; LE-32BIT-NEXT: # %bb.11: -; LE-32BIT-NEXT: ori 3, 4, 0 -; LE-32BIT-NEXT: b .LBB6_13 -; LE-32BIT-NEXT: .LBB6_12: -; LE-32BIT-NEXT: addi 3, 6, 0 -; LE-32BIT-NEXT: .LBB6_13: -; LE-32BIT-NEXT: stw 3, 8(5) -; LE-32BIT-NEXT: lwz 30, 24(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 29, 20(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 28, 16(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 27, 12(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: addi 1, 1, 32 +; LE-32BIT-NEXT: stw 6, 0(5) +; LE-32BIT-NEXT: stw 4, 4(5) +; LE-32BIT-NEXT: addi 1, 1, 48 ; LE-32BIT-NEXT: blr %src = load i128, ptr %src.ptr, align 1 %byteOff = load i128, ptr %byteOff.ptr, align 1 @@ -360,93 +307,39 @@ ; ; LE-32BIT-LABEL: shl_16bytes: ; LE-32BIT: # %bb.0: -; LE-32BIT-NEXT: stwu 1, -32(1) +; LE-32BIT-NEXT: stwu 1, -48(1) ; LE-32BIT-NEXT: lwz 4, 12(4) -; LE-32BIT-NEXT: li 8, 0 -; LE-32BIT-NEXT: lwz 6, 4(3) -; LE-32BIT-NEXT: lwz 7, 0(3) -; LE-32BIT-NEXT: rlwinm. 4, 4, 3, 0, 28 -; LE-32BIT-NEXT: lwz 9, 8(3) -; LE-32BIT-NEXT: subfic 10, 4, 96 -; LE-32BIT-NEXT: lwz 3, 12(3) -; LE-32BIT-NEXT: addi 11, 4, -64 -; LE-32BIT-NEXT: stw 27, 12(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: cmplwi 1, 4, 64 -; LE-32BIT-NEXT: stw 28, 16(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: slw 28, 3, 4 -; LE-32BIT-NEXT: stw 30, 24(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: subfic 30, 4, 32 -; LE-32BIT-NEXT: srw 10, 3, 10 -; LE-32BIT-NEXT: slw 27, 9, 11 -; LE-32BIT-NEXT: addi 12, 4, -96 -; LE-32BIT-NEXT: slw 0, 7, 4 -; LE-32BIT-NEXT: or 10, 27, 10 -; LE-32BIT-NEXT: srw 27, 6, 30 -; LE-32BIT-NEXT: bc 12, 4, .LBB7_2 -; LE-32BIT-NEXT: # %bb.1: -; LE-32BIT-NEXT: ori 28, 8, 0 +; LE-32BIT-NEXT: li 6, 0 +; LE-32BIT-NEXT: lwz 7, 4(3) +; LE-32BIT-NEXT: lwz 8, 0(3) +; LE-32BIT-NEXT: clrlwi 4, 4, 3 +; LE-32BIT-NEXT: lwz 9, 12(3) +; LE-32BIT-NEXT: cmplwi 4, 16 +; LE-32BIT-NEXT: lwz 3, 8(3) +; LE-32BIT-NEXT: stw 6, 44(1) +; LE-32BIT-NEXT: stw 3, 24(1) +; LE-32BIT-NEXT: li 3, 16 +; LE-32BIT-NEXT: stw 6, 40(1) +; LE-32BIT-NEXT: bc 12, 0, .LBB7_1 ; LE-32BIT-NEXT: b .LBB7_2 +; LE-32BIT-NEXT: .LBB7_1: +; LE-32BIT-NEXT: addi 3, 4, 0 ; LE-32BIT-NEXT: .LBB7_2: -; LE-32BIT-NEXT: stw 29, 20(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: slw 29, 9, 4 -; LE-32BIT-NEXT: or 0, 0, 27 -; LE-32BIT-NEXT: srw 27, 3, 30 -; LE-32BIT-NEXT: stw 28, 12(5) -; LE-32BIT-NEXT: subfic 28, 4, 64 -; LE-32BIT-NEXT: slw 12, 3, 12 -; LE-32BIT-NEXT: or 29, 29, 27 -; LE-32BIT-NEXT: addi 27, 4, -32 -; LE-32BIT-NEXT: or 10, 10, 12 -; LE-32BIT-NEXT: subfic 12, 28, 32 -; LE-32BIT-NEXT: srw 30, 9, 30 -; LE-32BIT-NEXT: slw 12, 9, 12 -; LE-32BIT-NEXT: srw 9, 9, 28 -; LE-32BIT-NEXT: srw 28, 3, 28 -; LE-32BIT-NEXT: slw 11, 3, 11 -; LE-32BIT-NEXT: slw 3, 3, 27 -; LE-32BIT-NEXT: slw 27, 6, 27 -; LE-32BIT-NEXT: or 0, 0, 27 -; LE-32BIT-NEXT: or 12, 28, 12 -; LE-32BIT-NEXT: slw 4, 6, 4 -; LE-32BIT-NEXT: or 3, 29, 3 -; LE-32BIT-NEXT: or 9, 0, 9 -; LE-32BIT-NEXT: or 12, 12, 30 -; LE-32BIT-NEXT: bc 12, 4, .LBB7_4 -; LE-32BIT-NEXT: # %bb.3: -; LE-32BIT-NEXT: ori 3, 8, 0 -; LE-32BIT-NEXT: ori 8, 10, 0 -; LE-32BIT-NEXT: b .LBB7_5 -; LE-32BIT-NEXT: .LBB7_4: -; LE-32BIT-NEXT: addi 8, 9, 0 -; LE-32BIT-NEXT: .LBB7_5: -; LE-32BIT-NEXT: or 4, 4, 12 +; LE-32BIT-NEXT: stw 6, 36(1) +; LE-32BIT-NEXT: stw 6, 32(1) +; LE-32BIT-NEXT: addi 6, 1, 16 +; LE-32BIT-NEXT: stw 9, 28(1) +; LE-32BIT-NEXT: stw 8, 16(1) +; LE-32BIT-NEXT: stw 7, 20(1) +; LE-32BIT-NEXT: lwzux 4, 3, 6 +; LE-32BIT-NEXT: lwz 6, 4(3) +; LE-32BIT-NEXT: lwz 7, 12(3) +; LE-32BIT-NEXT: lwz 3, 8(3) +; LE-32BIT-NEXT: stw 4, 0(5) ; LE-32BIT-NEXT: stw 3, 8(5) -; LE-32BIT-NEXT: bc 12, 2, .LBB7_7 -; LE-32BIT-NEXT: # %bb.6: -; LE-32BIT-NEXT: ori 3, 8, 0 -; LE-32BIT-NEXT: b .LBB7_8 -; LE-32BIT-NEXT: .LBB7_7: -; LE-32BIT-NEXT: addi 3, 7, 0 -; LE-32BIT-NEXT: .LBB7_8: -; LE-32BIT-NEXT: bc 12, 4, .LBB7_10 -; LE-32BIT-NEXT: # %bb.9: -; LE-32BIT-NEXT: ori 4, 11, 0 -; LE-32BIT-NEXT: b .LBB7_10 -; LE-32BIT-NEXT: .LBB7_10: -; LE-32BIT-NEXT: stw 3, 0(5) -; LE-32BIT-NEXT: bc 12, 2, .LBB7_12 -; LE-32BIT-NEXT: # %bb.11: -; LE-32BIT-NEXT: ori 3, 4, 0 -; LE-32BIT-NEXT: b .LBB7_13 -; LE-32BIT-NEXT: .LBB7_12: -; LE-32BIT-NEXT: addi 3, 6, 0 -; LE-32BIT-NEXT: .LBB7_13: -; LE-32BIT-NEXT: stw 3, 4(5) -; LE-32BIT-NEXT: lwz 30, 24(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 29, 20(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 28, 16(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 27, 12(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: addi 1, 1, 32 +; LE-32BIT-NEXT: stw 7, 12(5) +; LE-32BIT-NEXT: stw 6, 4(5) +; LE-32BIT-NEXT: addi 1, 1, 48 ; LE-32BIT-NEXT: blr %src = load i128, ptr %src.ptr, align 1 %byteOff = load i128, ptr %byteOff.ptr, align 1 @@ -500,95 +393,42 @@ ; ; LE-32BIT-LABEL: ashr_16bytes: ; LE-32BIT: # %bb.0: -; LE-32BIT-NEXT: stwu 1, -32(1) +; LE-32BIT-NEXT: stwu 1, -48(1) ; LE-32BIT-NEXT: lwz 4, 12(4) +; LE-32BIT-NEXT: li 6, 16 +; LE-32BIT-NEXT: lwz 7, 4(3) ; LE-32BIT-NEXT: lwz 8, 0(3) -; LE-32BIT-NEXT: lwz 9, 4(3) -; LE-32BIT-NEXT: lwz 6, 8(3) -; LE-32BIT-NEXT: lwz 7, 12(3) -; LE-32BIT-NEXT: rlwinm. 3, 4, 3, 0, 28 -; LE-32BIT-NEXT: subfic 10, 3, 96 -; LE-32BIT-NEXT: addi 11, 3, -64 -; LE-32BIT-NEXT: stw 27, 12(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: addi 12, 3, -96 -; LE-32BIT-NEXT: stw 30, 24(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: subfic 30, 3, 32 -; LE-32BIT-NEXT: slw 10, 8, 10 -; LE-32BIT-NEXT: srw 27, 9, 11 -; LE-32BIT-NEXT: stw 26, 8(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: sraw 26, 8, 12 -; LE-32BIT-NEXT: stw 28, 16(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: srw 28, 9, 3 -; LE-32BIT-NEXT: stw 29, 20(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: addi 29, 3, -32 -; LE-32BIT-NEXT: cmpwi 1, 12, 1 -; LE-32BIT-NEXT: slw 12, 8, 30 -; LE-32BIT-NEXT: or 10, 27, 10 -; LE-32BIT-NEXT: srw 0, 7, 3 -; LE-32BIT-NEXT: sraw 27, 8, 29 -; LE-32BIT-NEXT: bc 12, 4, .LBB8_2 +; LE-32BIT-NEXT: clrlwi 4, 4, 3 +; LE-32BIT-NEXT: lwz 9, 12(3) +; LE-32BIT-NEXT: cmplwi 4, 16 +; LE-32BIT-NEXT: lwz 3, 8(3) +; LE-32BIT-NEXT: stw 7, 36(1) +; LE-32BIT-NEXT: addi 7, 1, 32 +; LE-32BIT-NEXT: stw 3, 40(1) +; LE-32BIT-NEXT: srawi 3, 8, 31 +; LE-32BIT-NEXT: stw 3, 28(1) +; LE-32BIT-NEXT: stw 3, 24(1) +; LE-32BIT-NEXT: stw 3, 20(1) +; LE-32BIT-NEXT: stw 3, 16(1) +; LE-32BIT-NEXT: bc 12, 0, .LBB8_2 ; LE-32BIT-NEXT: # %bb.1: -; LE-32BIT-NEXT: ori 10, 26, 0 -; LE-32BIT-NEXT: b .LBB8_2 +; LE-32BIT-NEXT: ori 3, 6, 0 +; LE-32BIT-NEXT: b .LBB8_3 ; LE-32BIT-NEXT: .LBB8_2: -; LE-32BIT-NEXT: cmpwi 1, 29, 1 -; LE-32BIT-NEXT: or 12, 28, 12 -; LE-32BIT-NEXT: subfic 28, 3, 64 -; LE-32BIT-NEXT: slw 26, 6, 30 -; LE-32BIT-NEXT: srawi 4, 8, 31 -; LE-32BIT-NEXT: bc 12, 4, .LBB8_4 -; LE-32BIT-NEXT: # %bb.3: -; LE-32BIT-NEXT: ori 12, 27, 0 -; LE-32BIT-NEXT: b .LBB8_4 -; LE-32BIT-NEXT: .LBB8_4: -; LE-32BIT-NEXT: sraw 27, 8, 3 -; LE-32BIT-NEXT: or 0, 0, 26 -; LE-32BIT-NEXT: slw 26, 9, 28 -; LE-32BIT-NEXT: sraw 11, 8, 11 -; LE-32BIT-NEXT: slw 8, 8, 28 -; LE-32BIT-NEXT: subfic 28, 28, 32 -; LE-32BIT-NEXT: slw 30, 9, 30 -; LE-32BIT-NEXT: srw 9, 9, 28 -; LE-32BIT-NEXT: srw 29, 6, 29 -; LE-32BIT-NEXT: or 8, 8, 9 -; LE-32BIT-NEXT: cmplwi 1, 3, 64 -; LE-32BIT-NEXT: or 0, 0, 29 -; LE-32BIT-NEXT: srw 3, 6, 3 -; LE-32BIT-NEXT: or 8, 8, 30 -; LE-32BIT-NEXT: or 9, 0, 26 -; LE-32BIT-NEXT: or 3, 3, 8 -; LE-32BIT-NEXT: bc 12, 4, .LBB8_6 -; LE-32BIT-NEXT: # %bb.5: -; LE-32BIT-NEXT: ori 28, 4, 0 -; LE-32BIT-NEXT: ori 9, 10, 0 -; LE-32BIT-NEXT: ori 3, 11, 0 -; LE-32BIT-NEXT: b .LBB8_7 -; LE-32BIT-NEXT: .LBB8_6: -; LE-32BIT-NEXT: addi 28, 27, 0 -; LE-32BIT-NEXT: addi 4, 12, 0 -; LE-32BIT-NEXT: .LBB8_7: -; LE-32BIT-NEXT: lwz 30, 24(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: bc 12, 2, .LBB8_8 -; LE-32BIT-NEXT: b .LBB8_9 -; LE-32BIT-NEXT: .LBB8_8: -; LE-32BIT-NEXT: addi 3, 6, 0 -; LE-32BIT-NEXT: .LBB8_9: +; LE-32BIT-NEXT: addi 3, 4, 0 +; LE-32BIT-NEXT: .LBB8_3: +; LE-32BIT-NEXT: stw 9, 44(1) +; LE-32BIT-NEXT: sub 3, 7, 3 +; LE-32BIT-NEXT: stw 8, 32(1) +; LE-32BIT-NEXT: lwz 4, 4(3) +; LE-32BIT-NEXT: lwz 6, 0(3) +; LE-32BIT-NEXT: lwz 7, 8(3) +; LE-32BIT-NEXT: lwz 3, 12(3) +; LE-32BIT-NEXT: stw 7, 8(5) +; LE-32BIT-NEXT: stw 3, 12(5) +; LE-32BIT-NEXT: stw 6, 0(5) ; LE-32BIT-NEXT: stw 4, 4(5) -; LE-32BIT-NEXT: bc 12, 2, .LBB8_11 -; LE-32BIT-NEXT: # %bb.10: -; LE-32BIT-NEXT: ori 4, 9, 0 -; LE-32BIT-NEXT: b .LBB8_12 -; LE-32BIT-NEXT: .LBB8_11: -; LE-32BIT-NEXT: addi 4, 7, 0 -; LE-32BIT-NEXT: .LBB8_12: -; LE-32BIT-NEXT: stw 28, 0(5) -; LE-32BIT-NEXT: stw 4, 12(5) -; LE-32BIT-NEXT: stw 3, 8(5) -; LE-32BIT-NEXT: lwz 29, 20(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 28, 16(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 27, 12(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 26, 8(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: addi 1, 1, 32 +; LE-32BIT-NEXT: addi 1, 1, 48 ; LE-32BIT-NEXT: blr %src = load i128, ptr %src.ptr, align 1 %byteOff = load i128, ptr %byteOff.ptr, align 1 @@ -602,582 +442,122 @@ ; LE-64BIT-LABEL: lshr_32bytes: ; LE-64BIT: # %bb.0: ; LE-64BIT-NEXT: lwz 4, 0(4) -; LE-64BIT-NEXT: ld 7, 0(3) -; LE-64BIT-NEXT: ld 8, 8(3) -; LE-64BIT-NEXT: ld 9, 16(3) -; LE-64BIT-NEXT: li 6, 0 -; LE-64BIT-NEXT: ld 3, 24(3) -; LE-64BIT-NEXT: std 28, -32(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: std 21, -88(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: std 24, -64(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: rlwinm. 4, 4, 3, 0, 28 -; LE-64BIT-NEXT: std 25, -56(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: subfic 28, 4, 64 -; LE-64BIT-NEXT: subfic 11, 4, 192 -; LE-64BIT-NEXT: std 27, -40(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: std 29, -24(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: addi 0, 4, -128 -; LE-64BIT-NEXT: srd 29, 9, 4 -; LE-64BIT-NEXT: addi 27, 4, -64 -; LE-64BIT-NEXT: subfic 25, 4, 128 -; LE-64BIT-NEXT: sld 24, 8, 28 -; LE-64BIT-NEXT: std 22, -80(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: std 26, -48(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: sld 21, 9, 28 -; LE-64BIT-NEXT: sld 28, 3, 28 -; LE-64BIT-NEXT: std 30, -16(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: srd 10, 7, 4 -; LE-64BIT-NEXT: addi 30, 4, -192 -; LE-64BIT-NEXT: subfic 22, 25, 64 -; LE-64BIT-NEXT: sld 11, 3, 11 -; LE-64BIT-NEXT: srd 26, 9, 0 -; LE-64BIT-NEXT: or 29, 29, 28 -; LE-64BIT-NEXT: std 23, -72(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: or 10, 10, 24 -; LE-64BIT-NEXT: srd 28, 3, 27 -; LE-64BIT-NEXT: srd 30, 3, 30 -; LE-64BIT-NEXT: or 11, 26, 11 -; LE-64BIT-NEXT: ld 26, -48(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: srd 23, 8, 27 -; LE-64BIT-NEXT: srd 27, 9, 22 -; LE-64BIT-NEXT: or 29, 29, 28 -; LE-64BIT-NEXT: or 11, 11, 30 -; LE-64BIT-NEXT: ld 24, -64(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: sld 28, 3, 25 -; LE-64BIT-NEXT: or 10, 10, 23 -; LE-64BIT-NEXT: ld 23, -72(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: ld 22, -80(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: sld 9, 9, 25 -; LE-64BIT-NEXT: or 30, 28, 27 -; LE-64BIT-NEXT: ld 28, -32(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: ld 27, -40(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: ld 25, -56(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: cmplwi 1, 4, 128 -; LE-64BIT-NEXT: srd 12, 8, 4 -; LE-64BIT-NEXT: or 9, 10, 9 -; LE-64BIT-NEXT: or 30, 30, 21 -; LE-64BIT-NEXT: ld 21, -88(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: srd 10, 3, 0 -; LE-64BIT-NEXT: isel 9, 9, 11, 4 -; LE-64BIT-NEXT: or 11, 12, 30 -; LE-64BIT-NEXT: ld 30, -16(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: iseleq 7, 7, 9 -; LE-64BIT-NEXT: srd 3, 3, 4 -; LE-64BIT-NEXT: isel 9, 11, 10, 4 -; LE-64BIT-NEXT: std 7, 0(5) -; LE-64BIT-NEXT: isel 0, 29, 6, 4 -; LE-64BIT-NEXT: ld 29, -24(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: iseleq 4, 8, 9 -; LE-64BIT-NEXT: std 0, 16(5) -; LE-64BIT-NEXT: isel 3, 3, 6, 4 -; LE-64BIT-NEXT: std 4, 8(5) -; LE-64BIT-NEXT: std 3, 24(5) +; LE-64BIT-NEXT: li 6, 16 +; LE-64BIT-NEXT: lxvd2x 1, 0, 3 +; LE-64BIT-NEXT: xxlxor 2, 2, 2 +; LE-64BIT-NEXT: li 9, 32 +; LE-64BIT-NEXT: addi 7, 1, -64 +; LE-64BIT-NEXT: lxvd2x 0, 3, 6 +; LE-64BIT-NEXT: li 8, 32 +; LE-64BIT-NEXT: clrlwi 3, 4, 3 +; LE-64BIT-NEXT: li 4, 48 +; LE-64BIT-NEXT: cmplwi 3, 32 +; LE-64BIT-NEXT: stxvd2x 2, 7, 4 +; LE-64BIT-NEXT: stxvd2x 2, 7, 8 +; LE-64BIT-NEXT: isellt 3, 3, 9 +; LE-64BIT-NEXT: stxvd2x 0, 7, 6 +; LE-64BIT-NEXT: stxvd2x 1, 0, 7 +; LE-64BIT-NEXT: add 4, 7, 3 +; LE-64BIT-NEXT: lxvd2x 0, 7, 3 +; LE-64BIT-NEXT: lxvd2x 1, 4, 6 +; LE-64BIT-NEXT: stxvd2x 1, 5, 6 +; LE-64BIT-NEXT: stxvd2x 0, 0, 5 ; LE-64BIT-NEXT: blr ; ; BE-LABEL: lshr_32bytes: ; BE: # %bb.0: ; BE-NEXT: lwz 4, 28(4) -; BE-NEXT: ld 7, 16(3) -; BE-NEXT: ld 8, 24(3) -; BE-NEXT: ld 9, 8(3) -; BE-NEXT: ld 3, 0(3) -; BE-NEXT: std 27, -40(1) # 8-byte Folded Spill -; BE-NEXT: std 30, -16(1) # 8-byte Folded Spill -; BE-NEXT: std 28, -32(1) # 8-byte Folded Spill -; BE-NEXT: std 29, -24(1) # 8-byte Folded Spill -; BE-NEXT: li 6, 0 -; BE-NEXT: rlwinm. 4, 4, 3, 0, 28 -; BE-NEXT: subfic 10, 4, 192 -; BE-NEXT: addi 11, 4, -128 -; BE-NEXT: addi 12, 4, -192 -; BE-NEXT: subfic 30, 4, 64 -; BE-NEXT: sld 10, 3, 10 -; BE-NEXT: srd 27, 9, 11 -; BE-NEXT: srd 0, 8, 4 -; BE-NEXT: addi 29, 4, -64 -; BE-NEXT: subfic 28, 4, 128 -; BE-NEXT: srd 12, 3, 12 -; BE-NEXT: or 10, 27, 10 -; BE-NEXT: sld 27, 7, 30 -; BE-NEXT: or 10, 10, 12 -; BE-NEXT: or 0, 0, 27 -; BE-NEXT: srd 27, 7, 29 -; BE-NEXT: subfic 12, 28, 64 -; BE-NEXT: or 0, 0, 27 -; BE-NEXT: sld 27, 3, 28 -; BE-NEXT: srd 12, 9, 12 -; BE-NEXT: sld 28, 9, 28 -; BE-NEXT: cmplwi 1, 4, 128 -; BE-NEXT: or 12, 27, 12 -; BE-NEXT: or 28, 0, 28 -; BE-NEXT: sld 0, 9, 30 -; BE-NEXT: srd 9, 9, 4 -; BE-NEXT: srd 11, 3, 11 -; BE-NEXT: bc 12, 4, .LBB9_1 +; BE-NEXT: ld 7, 8(3) +; BE-NEXT: ld 8, 0(3) +; BE-NEXT: ld 9, 24(3) +; BE-NEXT: ld 3, 16(3) +; BE-NEXT: li 6, 32 +; BE-NEXT: addi 10, 1, -64 +; BE-NEXT: clrlwi 4, 4, 3 +; BE-NEXT: cmplwi 4, 32 +; BE-NEXT: bc 12, 0, .LBB9_2 +; BE-NEXT: # %bb.1: +; BE-NEXT: ori 4, 6, 0 ; BE-NEXT: b .LBB9_2 -; BE-NEXT: .LBB9_1: -; BE-NEXT: addi 10, 28, 0 ; BE-NEXT: .LBB9_2: -; BE-NEXT: ld 28, -32(1) # 8-byte Folded Reload -; BE-NEXT: ld 27, -40(1) # 8-byte Folded Reload -; BE-NEXT: or 12, 12, 0 -; BE-NEXT: srd 0, 7, 4 -; BE-NEXT: or 12, 0, 12 -; BE-NEXT: sld 0, 3, 30 -; BE-NEXT: srd 30, 3, 29 -; BE-NEXT: bc 12, 4, .LBB9_3 -; BE-NEXT: b .LBB9_4 -; BE-NEXT: .LBB9_3: -; BE-NEXT: addi 11, 12, 0 -; BE-NEXT: .LBB9_4: -; BE-NEXT: srd 3, 3, 4 -; BE-NEXT: bc 12, 2, .LBB9_6 -; BE-NEXT: # %bb.5: -; BE-NEXT: ori 4, 10, 0 -; BE-NEXT: b .LBB9_7 -; BE-NEXT: .LBB9_6: -; BE-NEXT: addi 4, 8, 0 -; BE-NEXT: .LBB9_7: -; BE-NEXT: ld 29, -24(1) # 8-byte Folded Reload -; BE-NEXT: or 9, 9, 0 -; BE-NEXT: or 9, 9, 30 -; BE-NEXT: bc 12, 2, .LBB9_9 -; BE-NEXT: # %bb.8: -; BE-NEXT: ori 7, 11, 0 -; BE-NEXT: b .LBB9_9 -; BE-NEXT: .LBB9_9: -; BE-NEXT: bc 12, 4, .LBB9_11 -; BE-NEXT: # %bb.10: -; BE-NEXT: ori 8, 6, 0 -; BE-NEXT: ori 3, 6, 0 -; BE-NEXT: b .LBB9_12 -; BE-NEXT: .LBB9_11: -; BE-NEXT: addi 8, 9, 0 -; BE-NEXT: .LBB9_12: -; BE-NEXT: std 4, 24(5) -; BE-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; BE-NEXT: li 6, 0 +; BE-NEXT: neg 4, 4 +; BE-NEXT: std 6, 24(10) +; BE-NEXT: std 6, 16(10) +; BE-NEXT: std 6, 8(10) +; BE-NEXT: std 6, -64(1) +; BE-NEXT: std 3, 48(10) +; BE-NEXT: std 9, 56(10) +; BE-NEXT: std 8, 32(10) +; BE-NEXT: std 7, 40(10) +; BE-NEXT: clrldi 3, 4, 32 +; BE-NEXT: addi 4, 1, -32 +; BE-NEXT: ldux 3, 4, 3 +; BE-NEXT: ld 6, 8(4) +; BE-NEXT: ld 7, 24(4) +; BE-NEXT: ld 4, 16(4) ; BE-NEXT: std 3, 0(5) -; BE-NEXT: std 8, 8(5) -; BE-NEXT: std 7, 16(5) +; BE-NEXT: std 4, 16(5) +; BE-NEXT: std 7, 24(5) +; BE-NEXT: std 6, 8(5) ; BE-NEXT: blr ; ; LE-32BIT-LABEL: lshr_32bytes: ; LE-32BIT: # %bb.0: -; LE-32BIT-NEXT: stwu 1, -144(1) -; LE-32BIT-NEXT: mfcr 12 -; LE-32BIT-NEXT: stw 14, 72(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 15, 76(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 16, 80(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 17, 84(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 18, 88(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 19, 92(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 20, 96(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 21, 100(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 22, 104(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 23, 108(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 24, 112(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 25, 116(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 26, 120(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 27, 124(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 28, 128(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 29, 132(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 30, 136(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 31, 140(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 12, 68(1) -; LE-32BIT-NEXT: lwz 0, 28(4) -; LE-32BIT-NEXT: lwz 11, 4(3) -; LE-32BIT-NEXT: lwz 6, 0(3) -; LE-32BIT-NEXT: rlwinm. 30, 0, 3, 0, 28 -; LE-32BIT-NEXT: stw 5, 64(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: subfic 21, 30, 224 -; LE-32BIT-NEXT: lwz 5, 24(3) -; LE-32BIT-NEXT: subfic 4, 30, 160 -; LE-32BIT-NEXT: lwz 7, 28(3) -; LE-32BIT-NEXT: addi 0, 30, -128 -; LE-32BIT-NEXT: lwz 10, 20(3) -; LE-32BIT-NEXT: subfic 28, 30, 96 -; LE-32BIT-NEXT: lwz 8, 16(3) -; LE-32BIT-NEXT: addi 29, 30, -64 -; LE-32BIT-NEXT: lwz 27, 12(3) -; LE-32BIT-NEXT: subfic 12, 30, 32 -; LE-32BIT-NEXT: lwz 9, 8(3) -; LE-32BIT-NEXT: addi 3, 30, -192 -; LE-32BIT-NEXT: slw 21, 6, 21 -; LE-32BIT-NEXT: srw 16, 11, 3 -; LE-32BIT-NEXT: stw 3, 56(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: srw 20, 7, 30 -; LE-32BIT-NEXT: slw 15, 9, 4 -; LE-32BIT-NEXT: srw 14, 27, 0 -; LE-32BIT-NEXT: slw 31, 8, 28 -; LE-32BIT-NEXT: srw 3, 10, 29 -; LE-32BIT-NEXT: or 21, 16, 21 -; LE-32BIT-NEXT: slw 16, 5, 12 -; LE-32BIT-NEXT: srw 19, 10, 30 -; LE-32BIT-NEXT: or 15, 14, 15 -; LE-32BIT-NEXT: slw 14, 8, 12 -; LE-32BIT-NEXT: or 3, 3, 31 -; LE-32BIT-NEXT: slw 31, 6, 4 -; LE-32BIT-NEXT: or 20, 20, 16 -; LE-32BIT-NEXT: srw 16, 11, 0 -; LE-32BIT-NEXT: stw 7, 60(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: addi 26, 30, -224 -; LE-32BIT-NEXT: mr 7, 10 -; LE-32BIT-NEXT: mr 10, 12 -; LE-32BIT-NEXT: or 19, 19, 14 -; LE-32BIT-NEXT: slw 14, 6, 28 -; LE-32BIT-NEXT: or 16, 16, 31 -; LE-32BIT-NEXT: srw 31, 11, 29 -; LE-32BIT-NEXT: addi 23, 30, -160 -; LE-32BIT-NEXT: srw 18, 27, 30 -; LE-32BIT-NEXT: stw 0, 40(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: mr 12, 28 -; LE-32BIT-NEXT: or 14, 31, 14 -; LE-32BIT-NEXT: mr 28, 9 -; LE-32BIT-NEXT: slw 31, 9, 10 -; LE-32BIT-NEXT: srw 0, 6, 26 -; LE-32BIT-NEXT: addi 25, 30, -96 -; LE-32BIT-NEXT: srw 17, 11, 30 -; LE-32BIT-NEXT: stw 4, 36(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: or 18, 18, 31 -; LE-32BIT-NEXT: slw 31, 6, 10 -; LE-32BIT-NEXT: or 4, 21, 0 -; LE-32BIT-NEXT: srw 0, 28, 23 -; LE-32BIT-NEXT: or 17, 17, 31 -; LE-32BIT-NEXT: addi 31, 30, -32 -; LE-32BIT-NEXT: or 0, 15, 0 -; LE-32BIT-NEXT: srw 15, 8, 25 -; LE-32BIT-NEXT: or 3, 3, 15 -; LE-32BIT-NEXT: srw 15, 5, 31 -; LE-32BIT-NEXT: or 20, 20, 15 -; LE-32BIT-NEXT: srw 15, 8, 31 -; LE-32BIT-NEXT: stw 3, 24(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: or 3, 19, 15 -; LE-32BIT-NEXT: srw 23, 6, 23 -; LE-32BIT-NEXT: stw 3, 48(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: subfic 15, 30, 64 -; LE-32BIT-NEXT: or 3, 16, 23 -; LE-32BIT-NEXT: stw 3, 44(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: subfic 3, 15, 32 -; LE-32BIT-NEXT: slw 16, 28, 15 -; LE-32BIT-NEXT: srw 22, 27, 3 -; LE-32BIT-NEXT: stw 4, 32(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: or 4, 16, 22 -; LE-32BIT-NEXT: subfic 16, 30, 128 -; LE-32BIT-NEXT: stw 5, 28(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: subfic 5, 16, 32 -; LE-32BIT-NEXT: stw 4, 20(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: slw 4, 6, 16 -; LE-32BIT-NEXT: srw 24, 11, 5 -; LE-32BIT-NEXT: stw 29, 52(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: mr 29, 27 -; LE-32BIT-NEXT: or 22, 4, 24 -; LE-32BIT-NEXT: slw 24, 28, 16 -; LE-32BIT-NEXT: srw 27, 27, 5 -; LE-32BIT-NEXT: or 27, 24, 27 -; LE-32BIT-NEXT: slw 24, 8, 15 -; LE-32BIT-NEXT: srw 26, 7, 3 -; LE-32BIT-NEXT: or 26, 24, 26 -; LE-32BIT-NEXT: subfic 24, 30, 192 -; LE-32BIT-NEXT: mr 9, 10 -; LE-32BIT-NEXT: mr 10, 28 -; LE-32BIT-NEXT: subfic 28, 24, 32 -; LE-32BIT-NEXT: srw 28, 11, 28 -; LE-32BIT-NEXT: slw 19, 6, 24 -; LE-32BIT-NEXT: or 28, 19, 28 -; LE-32BIT-NEXT: srw 19, 6, 25 -; LE-32BIT-NEXT: or 19, 14, 19 -; LE-32BIT-NEXT: srw 14, 10, 31 -; LE-32BIT-NEXT: lwz 4, 64(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 18, 18, 14 -; LE-32BIT-NEXT: srw 3, 11, 3 -; LE-32BIT-NEXT: slw 14, 6, 15 -; LE-32BIT-NEXT: cmplwi 5, 30, 64 -; LE-32BIT-NEXT: cmplwi 1, 30, 128 -; LE-32BIT-NEXT: slw 24, 11, 24 -; LE-32BIT-NEXT: mr 21, 8 -; LE-32BIT-NEXT: or 8, 14, 3 -; LE-32BIT-NEXT: srw 14, 6, 31 -; LE-32BIT-NEXT: crnand 28, 4, 20 -; LE-32BIT-NEXT: srw 31, 6, 30 -; LE-32BIT-NEXT: or 24, 0, 24 -; LE-32BIT-NEXT: slw 0, 7, 15 -; LE-32BIT-NEXT: mr 23, 7 -; LE-32BIT-NEXT: or 17, 17, 14 -; LE-32BIT-NEXT: bc 12, 28, .LBB9_2 -; LE-32BIT-NEXT: # %bb.1: -; LE-32BIT-NEXT: ori 14, 31, 0 -; LE-32BIT-NEXT: b .LBB9_3 +; LE-32BIT-NEXT: stwu 1, -80(1) +; LE-32BIT-NEXT: lwz 4, 28(4) +; LE-32BIT-NEXT: li 6, 0 +; LE-32BIT-NEXT: lwz 7, 4(3) +; LE-32BIT-NEXT: lwz 8, 0(3) +; LE-32BIT-NEXT: clrlwi 4, 4, 3 +; LE-32BIT-NEXT: lwz 9, 12(3) +; LE-32BIT-NEXT: cmplwi 4, 32 +; LE-32BIT-NEXT: lwz 10, 8(3) +; LE-32BIT-NEXT: lwz 11, 20(3) +; LE-32BIT-NEXT: lwz 12, 16(3) +; LE-32BIT-NEXT: lwz 0, 28(3) +; LE-32BIT-NEXT: lwz 3, 24(3) +; LE-32BIT-NEXT: stw 6, 44(1) +; LE-32BIT-NEXT: stw 3, 72(1) +; LE-32BIT-NEXT: li 3, 32 +; LE-32BIT-NEXT: stw 6, 40(1) +; LE-32BIT-NEXT: bc 12, 0, .LBB9_1 +; LE-32BIT-NEXT: b .LBB9_2 +; LE-32BIT-NEXT: .LBB9_1: +; LE-32BIT-NEXT: addi 3, 4, 0 ; LE-32BIT-NEXT: .LBB9_2: -; LE-32BIT-NEXT: li 14, 0 -; LE-32BIT-NEXT: .LBB9_3: -; LE-32BIT-NEXT: or 20, 20, 0 -; LE-32BIT-NEXT: subfic 0, 16, 64 -; LE-32BIT-NEXT: lwz 7, 20(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: srw 31, 29, 0 -; LE-32BIT-NEXT: stw 14, 0(4) -; LE-32BIT-NEXT: subfic 14, 0, 32 -; LE-32BIT-NEXT: slw 14, 10, 14 -; LE-32BIT-NEXT: or 14, 31, 14 -; LE-32BIT-NEXT: slw 31, 29, 9 -; LE-32BIT-NEXT: lwz 3, 36(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 7, 7, 31 -; LE-32BIT-NEXT: slw 31, 11, 12 -; LE-32BIT-NEXT: stw 7, 20(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: or 7, 22, 31 -; LE-32BIT-NEXT: slw 31, 29, 12 -; LE-32BIT-NEXT: or 27, 27, 31 -; LE-32BIT-NEXT: slw 31, 23, 9 -; LE-32BIT-NEXT: or 26, 26, 31 -; LE-32BIT-NEXT: slw 31, 11, 3 -; LE-32BIT-NEXT: or 28, 28, 31 -; LE-32BIT-NEXT: slw 31, 11, 15 -; LE-32BIT-NEXT: lwz 22, 28(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 18, 18, 31 -; LE-32BIT-NEXT: lwz 31, 40(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: srw 0, 10, 0 -; LE-32BIT-NEXT: or 7, 7, 0 -; LE-32BIT-NEXT: srw 0, 22, 30 -; LE-32BIT-NEXT: slw 25, 11, 9 -; LE-32BIT-NEXT: or 26, 0, 26 -; LE-32BIT-NEXT: srw 0, 10, 31 -; LE-32BIT-NEXT: or 3, 8, 25 -; LE-32BIT-NEXT: or 28, 0, 28 -; LE-32BIT-NEXT: srw 0, 10, 30 -; LE-32BIT-NEXT: srw 5, 10, 5 -; LE-32BIT-NEXT: or 3, 0, 3 -; LE-32BIT-NEXT: bc 12, 28, .LBB9_5 -; LE-32BIT-NEXT: # %bb.4: -; LE-32BIT-NEXT: ori 0, 17, 0 -; LE-32BIT-NEXT: b .LBB9_6 -; LE-32BIT-NEXT: .LBB9_5: -; LE-32BIT-NEXT: li 0, 0 -; LE-32BIT-NEXT: .LBB9_6: -; LE-32BIT-NEXT: lwz 8, 32(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 5, 14, 5 -; LE-32BIT-NEXT: mr 14, 4 -; LE-32BIT-NEXT: stw 0, 4(4) -; LE-32BIT-NEXT: slw 0, 11, 16 -; LE-32BIT-NEXT: lwz 4, 52(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: cmplwi 6, 31, 64 -; LE-32BIT-NEXT: mr 9, 21 -; LE-32BIT-NEXT: or 5, 0, 5 -; LE-32BIT-NEXT: lwz 0, 56(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: bc 12, 24, .LBB9_8 -; LE-32BIT-NEXT: # %bb.7: -; LE-32BIT-NEXT: ori 25, 8, 0 -; LE-32BIT-NEXT: b .LBB9_9 -; LE-32BIT-NEXT: .LBB9_8: -; LE-32BIT-NEXT: addi 25, 24, 0 -; LE-32BIT-NEXT: .LBB9_9: -; LE-32BIT-NEXT: bc 12, 20, .LBB9_11 -; LE-32BIT-NEXT: # %bb.10: -; LE-32BIT-NEXT: ori 24, 19, 0 -; LE-32BIT-NEXT: b .LBB9_12 -; LE-32BIT-NEXT: .LBB9_11: -; LE-32BIT-NEXT: addi 24, 18, 0 -; LE-32BIT-NEXT: .LBB9_12: -; LE-32BIT-NEXT: srw 19, 9, 4 -; LE-32BIT-NEXT: srw 17, 6, 4 -; LE-32BIT-NEXT: lwz 4, 20(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: srw 30, 21, 30 -; LE-32BIT-NEXT: lwz 8, 24(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: slw 21, 29, 16 -; LE-32BIT-NEXT: cmplwi 7, 16, 64 -; LE-32BIT-NEXT: cmplwi 3, 16, 0 -; LE-32BIT-NEXT: li 16, 0 -; LE-32BIT-NEXT: srw 18, 6, 0 -; LE-32BIT-NEXT: bc 12, 28, .LBB9_14 -; LE-32BIT-NEXT: # %bb.13: -; LE-32BIT-NEXT: ori 0, 16, 0 -; LE-32BIT-NEXT: b .LBB9_15 -; LE-32BIT-NEXT: .LBB9_14: -; LE-32BIT-NEXT: addi 0, 21, 0 -; LE-32BIT-NEXT: .LBB9_15: -; LE-32BIT-NEXT: lwz 21, 60(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: bc 12, 28, .LBB9_16 -; LE-32BIT-NEXT: b .LBB9_17 -; LE-32BIT-NEXT: .LBB9_16: -; LE-32BIT-NEXT: addi 4, 7, 0 -; LE-32BIT-NEXT: .LBB9_17: -; LE-32BIT-NEXT: bc 12, 20, .LBB9_18 -; LE-32BIT-NEXT: b .LBB9_19 -; LE-32BIT-NEXT: .LBB9_18: -; LE-32BIT-NEXT: addi 8, 20, 0 -; LE-32BIT-NEXT: .LBB9_19: -; LE-32BIT-NEXT: mr 12, 29 -; LE-32BIT-NEXT: lwz 7, 48(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: slw 20, 29, 15 -; LE-32BIT-NEXT: srw 29, 6, 31 -; LE-32BIT-NEXT: bc 12, 2, .LBB9_20 -; LE-32BIT-NEXT: b .LBB9_21 -; LE-32BIT-NEXT: .LBB9_20: -; LE-32BIT-NEXT: addi 8, 21, 0 -; LE-32BIT-NEXT: .LBB9_21: -; LE-32BIT-NEXT: cmplwi 2, 31, 0 -; LE-32BIT-NEXT: bc 12, 20, .LBB9_23 -; LE-32BIT-NEXT: # %bb.22: -; LE-32BIT-NEXT: ori 26, 19, 0 -; LE-32BIT-NEXT: ori 3, 17, 0 -; LE-32BIT-NEXT: b .LBB9_23 -; LE-32BIT-NEXT: .LBB9_23: -; LE-32BIT-NEXT: or 8, 8, 0 -; LE-32BIT-NEXT: bc 12, 20, .LBB9_25 -; LE-32BIT-NEXT: # %bb.24: -; LE-32BIT-NEXT: ori 0, 16, 0 -; LE-32BIT-NEXT: b .LBB9_26 -; LE-32BIT-NEXT: .LBB9_25: -; LE-32BIT-NEXT: addi 0, 30, 0 -; LE-32BIT-NEXT: .LBB9_26: -; LE-32BIT-NEXT: bc 12, 24, .LBB9_28 -; LE-32BIT-NEXT: # %bb.27: -; LE-32BIT-NEXT: ori 30, 16, 0 -; LE-32BIT-NEXT: b .LBB9_29 -; LE-32BIT-NEXT: .LBB9_28: -; LE-32BIT-NEXT: addi 30, 29, 0 -; LE-32BIT-NEXT: .LBB9_29: -; LE-32BIT-NEXT: bc 12, 20, .LBB9_31 -; LE-32BIT-NEXT: # %bb.30: -; LE-32BIT-NEXT: ori 29, 16, 0 -; LE-32BIT-NEXT: b .LBB9_32 -; LE-32BIT-NEXT: .LBB9_31: -; LE-32BIT-NEXT: addi 29, 7, 0 -; LE-32BIT-NEXT: .LBB9_32: -; LE-32BIT-NEXT: lwz 7, 44(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: bc 12, 10, .LBB9_33 -; LE-32BIT-NEXT: b .LBB9_34 -; LE-32BIT-NEXT: .LBB9_33: -; LE-32BIT-NEXT: addi 25, 12, 0 -; LE-32BIT-NEXT: .LBB9_34: -; LE-32BIT-NEXT: bc 12, 14, .LBB9_35 -; LE-32BIT-NEXT: b .LBB9_36 -; LE-32BIT-NEXT: .LBB9_35: -; LE-32BIT-NEXT: addi 4, 6, 0 -; LE-32BIT-NEXT: .LBB9_36: -; LE-32BIT-NEXT: bc 12, 2, .LBB9_38 -; LE-32BIT-NEXT: # %bb.37: -; LE-32BIT-NEXT: ori 6, 26, 0 -; LE-32BIT-NEXT: b .LBB9_39 -; LE-32BIT-NEXT: .LBB9_38: -; LE-32BIT-NEXT: addi 6, 22, 0 -; LE-32BIT-NEXT: .LBB9_39: -; LE-32BIT-NEXT: li 26, 0 -; LE-32BIT-NEXT: bc 12, 2, .LBB9_40 -; LE-32BIT-NEXT: b .LBB9_41 -; LE-32BIT-NEXT: .LBB9_40: -; LE-32BIT-NEXT: addi 3, 10, 0 -; LE-32BIT-NEXT: .LBB9_41: -; LE-32BIT-NEXT: bc 12, 28, .LBB9_43 -; LE-32BIT-NEXT: # %bb.42: -; LE-32BIT-NEXT: ori 5, 20, 0 -; LE-32BIT-NEXT: b .LBB9_43 -; LE-32BIT-NEXT: .LBB9_43: -; LE-32BIT-NEXT: bc 12, 4, .LBB9_45 -; LE-32BIT-NEXT: # %bb.44: -; LE-32BIT-NEXT: ori 8, 25, 0 -; LE-32BIT-NEXT: b .LBB9_45 -; LE-32BIT-NEXT: .LBB9_45: -; LE-32BIT-NEXT: bc 12, 24, .LBB9_47 -; LE-32BIT-NEXT: # %bb.46: -; LE-32BIT-NEXT: ori 28, 18, 0 -; LE-32BIT-NEXT: b .LBB9_47 -; LE-32BIT-NEXT: .LBB9_47: -; LE-32BIT-NEXT: bc 12, 28, .LBB9_49 -; LE-32BIT-NEXT: # %bb.48: -; LE-32BIT-NEXT: ori 27, 16, 0 -; LE-32BIT-NEXT: b .LBB9_49 -; LE-32BIT-NEXT: .LBB9_49: -; LE-32BIT-NEXT: bc 12, 2, .LBB9_51 -; LE-32BIT-NEXT: # %bb.50: -; LE-32BIT-NEXT: ori 12, 24, 0 -; LE-32BIT-NEXT: b .LBB9_51 -; LE-32BIT-NEXT: .LBB9_51: -; LE-32BIT-NEXT: bc 12, 4, .LBB9_53 -; LE-32BIT-NEXT: # %bb.52: -; LE-32BIT-NEXT: ori 3, 26, 0 -; LE-32BIT-NEXT: b .LBB9_53 -; LE-32BIT-NEXT: .LBB9_53: -; LE-32BIT-NEXT: bc 12, 14, .LBB9_54 -; LE-32BIT-NEXT: b .LBB9_55 -; LE-32BIT-NEXT: .LBB9_54: -; LE-32BIT-NEXT: addi 5, 11, 0 -; LE-32BIT-NEXT: .LBB9_55: -; LE-32BIT-NEXT: bc 12, 10, .LBB9_56 -; LE-32BIT-NEXT: b .LBB9_57 -; LE-32BIT-NEXT: .LBB9_56: -; LE-32BIT-NEXT: addi 28, 10, 0 -; LE-32BIT-NEXT: .LBB9_57: -; LE-32BIT-NEXT: or 6, 6, 27 -; LE-32BIT-NEXT: stw 3, 8(14) -; LE-32BIT-NEXT: or 3, 0, 4 -; LE-32BIT-NEXT: bc 12, 2, .LBB9_59 -; LE-32BIT-NEXT: # %bb.58: -; LE-32BIT-NEXT: ori 4, 8, 0 -; LE-32BIT-NEXT: b .LBB9_60 -; LE-32BIT-NEXT: .LBB9_59: -; LE-32BIT-NEXT: addi 4, 21, 0 -; LE-32BIT-NEXT: .LBB9_60: -; LE-32BIT-NEXT: bc 12, 24, .LBB9_62 -; LE-32BIT-NEXT: # %bb.61: -; LE-32BIT-NEXT: ori 24, 16, 0 -; LE-32BIT-NEXT: b .LBB9_63 -; LE-32BIT-NEXT: .LBB9_62: -; LE-32BIT-NEXT: addi 24, 7, 0 -; LE-32BIT-NEXT: .LBB9_63: -; LE-32BIT-NEXT: bc 12, 4, .LBB9_65 -; LE-32BIT-NEXT: # %bb.64: -; LE-32BIT-NEXT: ori 3, 30, 0 -; LE-32BIT-NEXT: ori 6, 28, 0 -; LE-32BIT-NEXT: ori 12, 16, 0 -; LE-32BIT-NEXT: b .LBB9_65 -; LE-32BIT-NEXT: .LBB9_65: -; LE-32BIT-NEXT: stw 4, 28(14) -; LE-32BIT-NEXT: or 4, 29, 5 -; LE-32BIT-NEXT: bc 12, 4, .LBB9_67 -; LE-32BIT-NEXT: # %bb.66: -; LE-32BIT-NEXT: ori 4, 24, 0 -; LE-32BIT-NEXT: b .LBB9_67 -; LE-32BIT-NEXT: .LBB9_67: -; LE-32BIT-NEXT: bc 12, 2, .LBB9_69 -; LE-32BIT-NEXT: # %bb.68: -; LE-32BIT-NEXT: ori 5, 6, 0 -; LE-32BIT-NEXT: b .LBB9_70 -; LE-32BIT-NEXT: .LBB9_69: -; LE-32BIT-NEXT: addi 3, 9, 0 -; LE-32BIT-NEXT: addi 5, 22, 0 -; LE-32BIT-NEXT: .LBB9_70: -; LE-32BIT-NEXT: stw 12, 12(14) -; LE-32BIT-NEXT: stw 3, 16(14) -; LE-32BIT-NEXT: bc 12, 2, .LBB9_72 -; LE-32BIT-NEXT: # %bb.71: -; LE-32BIT-NEXT: ori 3, 4, 0 -; LE-32BIT-NEXT: b .LBB9_73 -; LE-32BIT-NEXT: .LBB9_72: -; LE-32BIT-NEXT: addi 3, 23, 0 -; LE-32BIT-NEXT: .LBB9_73: -; LE-32BIT-NEXT: stw 5, 24(14) -; LE-32BIT-NEXT: stw 3, 20(14) -; LE-32BIT-NEXT: lwz 12, 68(1) -; LE-32BIT-NEXT: lwz 31, 140(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: mtcrf 32, 12 # cr2 -; LE-32BIT-NEXT: mtcrf 16, 12 # cr3 -; LE-32BIT-NEXT: lwz 30, 136(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 29, 132(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 28, 128(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 27, 124(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 26, 120(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 25, 116(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 24, 112(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 23, 108(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 22, 104(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 21, 100(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 20, 96(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 19, 92(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 18, 88(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 17, 84(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 16, 80(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 15, 76(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 14, 72(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: addi 1, 1, 144 +; LE-32BIT-NEXT: stw 6, 36(1) +; LE-32BIT-NEXT: stw 6, 32(1) +; LE-32BIT-NEXT: stw 6, 28(1) +; LE-32BIT-NEXT: stw 6, 24(1) +; LE-32BIT-NEXT: stw 6, 20(1) +; LE-32BIT-NEXT: stw 6, 16(1) +; LE-32BIT-NEXT: addi 6, 1, 48 +; LE-32BIT-NEXT: stw 0, 76(1) +; LE-32BIT-NEXT: sub 3, 6, 3 +; LE-32BIT-NEXT: stw 12, 64(1) +; LE-32BIT-NEXT: stw 11, 68(1) +; LE-32BIT-NEXT: stw 10, 56(1) +; LE-32BIT-NEXT: stw 9, 60(1) +; LE-32BIT-NEXT: stw 8, 48(1) +; LE-32BIT-NEXT: stw 7, 52(1) +; LE-32BIT-NEXT: lwz 4, 4(3) +; LE-32BIT-NEXT: lwz 6, 0(3) +; LE-32BIT-NEXT: lwz 7, 12(3) +; LE-32BIT-NEXT: lwz 8, 8(3) +; LE-32BIT-NEXT: lwz 9, 20(3) +; LE-32BIT-NEXT: lwz 10, 16(3) +; LE-32BIT-NEXT: lwz 11, 24(3) +; LE-32BIT-NEXT: lwz 3, 28(3) +; LE-32BIT-NEXT: stw 11, 24(5) +; LE-32BIT-NEXT: stw 3, 28(5) +; LE-32BIT-NEXT: stw 10, 16(5) +; LE-32BIT-NEXT: stw 9, 20(5) +; LE-32BIT-NEXT: stw 8, 8(5) +; LE-32BIT-NEXT: stw 7, 12(5) +; LE-32BIT-NEXT: stw 6, 0(5) +; LE-32BIT-NEXT: stw 4, 4(5) +; LE-32BIT-NEXT: addi 1, 1, 80 ; LE-32BIT-NEXT: blr %src = load i256, ptr %src.ptr, align 1 %byteOff = load i256, ptr %byteOff.ptr, align 1 @@ -1190,581 +570,123 @@ ; LE-64BIT-LABEL: shl_32bytes: ; LE-64BIT: # %bb.0: ; LE-64BIT-NEXT: lwz 4, 0(4) -; LE-64BIT-NEXT: ld 7, 24(3) -; LE-64BIT-NEXT: ld 8, 16(3) -; LE-64BIT-NEXT: ld 9, 8(3) -; LE-64BIT-NEXT: li 6, 0 -; LE-64BIT-NEXT: ld 3, 0(3) -; LE-64BIT-NEXT: std 28, -32(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: std 21, -88(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: std 24, -64(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: rlwinm. 4, 4, 3, 0, 28 -; LE-64BIT-NEXT: std 25, -56(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: subfic 28, 4, 64 -; LE-64BIT-NEXT: subfic 11, 4, 192 -; LE-64BIT-NEXT: std 27, -40(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: std 29, -24(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: addi 0, 4, -128 -; LE-64BIT-NEXT: sld 29, 9, 4 -; LE-64BIT-NEXT: addi 27, 4, -64 -; LE-64BIT-NEXT: subfic 25, 4, 128 -; LE-64BIT-NEXT: srd 24, 8, 28 -; LE-64BIT-NEXT: std 22, -80(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: std 26, -48(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: srd 21, 9, 28 -; LE-64BIT-NEXT: srd 28, 3, 28 -; LE-64BIT-NEXT: std 30, -16(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: sld 10, 7, 4 -; LE-64BIT-NEXT: addi 30, 4, -192 -; LE-64BIT-NEXT: subfic 22, 25, 64 -; LE-64BIT-NEXT: srd 11, 3, 11 -; LE-64BIT-NEXT: sld 26, 9, 0 -; LE-64BIT-NEXT: or 29, 29, 28 -; LE-64BIT-NEXT: std 23, -72(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: or 10, 10, 24 -; LE-64BIT-NEXT: sld 28, 3, 27 -; LE-64BIT-NEXT: sld 30, 3, 30 -; LE-64BIT-NEXT: or 11, 26, 11 -; LE-64BIT-NEXT: ld 26, -48(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: sld 23, 8, 27 -; LE-64BIT-NEXT: sld 27, 9, 22 -; LE-64BIT-NEXT: or 29, 29, 28 -; LE-64BIT-NEXT: or 11, 11, 30 -; LE-64BIT-NEXT: ld 24, -64(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: srd 28, 3, 25 -; LE-64BIT-NEXT: or 10, 10, 23 -; LE-64BIT-NEXT: ld 23, -72(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: ld 22, -80(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: srd 9, 9, 25 -; LE-64BIT-NEXT: or 30, 28, 27 -; LE-64BIT-NEXT: ld 28, -32(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: ld 27, -40(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: ld 25, -56(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: cmplwi 1, 4, 128 -; LE-64BIT-NEXT: sld 12, 8, 4 -; LE-64BIT-NEXT: or 9, 10, 9 -; LE-64BIT-NEXT: or 30, 30, 21 -; LE-64BIT-NEXT: ld 21, -88(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: sld 10, 3, 0 -; LE-64BIT-NEXT: isel 9, 9, 11, 4 -; LE-64BIT-NEXT: or 11, 12, 30 -; LE-64BIT-NEXT: ld 30, -16(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: iseleq 7, 7, 9 -; LE-64BIT-NEXT: sld 3, 3, 4 -; LE-64BIT-NEXT: isel 9, 11, 10, 4 -; LE-64BIT-NEXT: std 7, 24(5) -; LE-64BIT-NEXT: isel 0, 29, 6, 4 -; LE-64BIT-NEXT: ld 29, -24(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: iseleq 4, 8, 9 -; LE-64BIT-NEXT: std 0, 8(5) -; LE-64BIT-NEXT: isel 3, 3, 6, 4 -; LE-64BIT-NEXT: std 4, 16(5) -; LE-64BIT-NEXT: std 3, 0(5) +; LE-64BIT-NEXT: li 6, 16 +; LE-64BIT-NEXT: li 7, 32 +; LE-64BIT-NEXT: xxlxor 1, 1, 1 +; LE-64BIT-NEXT: lxvd2x 2, 0, 3 +; LE-64BIT-NEXT: addi 8, 1, -32 +; LE-64BIT-NEXT: lxvd2x 0, 3, 6 +; LE-64BIT-NEXT: addi 3, 1, -64 +; LE-64BIT-NEXT: clrlwi 4, 4, 3 +; LE-64BIT-NEXT: stxvd2x 1, 3, 6 +; LE-64BIT-NEXT: cmplwi 4, 32 +; LE-64BIT-NEXT: isellt 4, 4, 7 +; LE-64BIT-NEXT: li 7, 48 +; LE-64BIT-NEXT: neg 4, 4 +; LE-64BIT-NEXT: stxvd2x 0, 3, 7 +; LE-64BIT-NEXT: li 7, 32 +; LE-64BIT-NEXT: clrldi 4, 4, 32 +; LE-64BIT-NEXT: stxvd2x 2, 3, 7 +; LE-64BIT-NEXT: stxvd2x 1, 0, 3 +; LE-64BIT-NEXT: add 3, 8, 4 +; LE-64BIT-NEXT: lxvd2x 0, 8, 4 +; LE-64BIT-NEXT: lxvd2x 1, 3, 6 +; LE-64BIT-NEXT: stxvd2x 1, 5, 6 +; LE-64BIT-NEXT: stxvd2x 0, 0, 5 ; LE-64BIT-NEXT: blr ; ; BE-LABEL: shl_32bytes: ; BE: # %bb.0: ; BE-NEXT: lwz 4, 28(4) -; BE-NEXT: ld 7, 8(3) ; BE-NEXT: ld 8, 0(3) -; BE-NEXT: ld 9, 16(3) -; BE-NEXT: ld 3, 24(3) -; BE-NEXT: std 27, -40(1) # 8-byte Folded Spill -; BE-NEXT: std 30, -16(1) # 8-byte Folded Spill -; BE-NEXT: std 28, -32(1) # 8-byte Folded Spill -; BE-NEXT: std 29, -24(1) # 8-byte Folded Spill -; BE-NEXT: li 6, 0 -; BE-NEXT: rlwinm. 4, 4, 3, 0, 28 -; BE-NEXT: subfic 10, 4, 192 -; BE-NEXT: addi 11, 4, -128 -; BE-NEXT: addi 12, 4, -192 -; BE-NEXT: subfic 30, 4, 64 -; BE-NEXT: srd 10, 3, 10 -; BE-NEXT: sld 27, 9, 11 -; BE-NEXT: sld 0, 8, 4 -; BE-NEXT: addi 29, 4, -64 -; BE-NEXT: subfic 28, 4, 128 -; BE-NEXT: sld 12, 3, 12 -; BE-NEXT: or 10, 27, 10 -; BE-NEXT: srd 27, 7, 30 -; BE-NEXT: or 10, 10, 12 -; BE-NEXT: or 0, 0, 27 -; BE-NEXT: sld 27, 7, 29 -; BE-NEXT: subfic 12, 28, 64 -; BE-NEXT: or 0, 0, 27 -; BE-NEXT: srd 27, 3, 28 -; BE-NEXT: sld 12, 9, 12 -; BE-NEXT: srd 28, 9, 28 -; BE-NEXT: cmplwi 1, 4, 128 -; BE-NEXT: or 12, 27, 12 -; BE-NEXT: or 28, 0, 28 -; BE-NEXT: srd 0, 9, 30 -; BE-NEXT: sld 9, 9, 4 -; BE-NEXT: sld 11, 3, 11 -; BE-NEXT: bc 12, 4, .LBB10_1 -; BE-NEXT: b .LBB10_2 -; BE-NEXT: .LBB10_1: -; BE-NEXT: addi 10, 28, 0 +; BE-NEXT: ld 9, 8(3) +; BE-NEXT: ld 10, 24(3) +; BE-NEXT: ld 3, 16(3) +; BE-NEXT: addi 6, 1, -64 +; BE-NEXT: li 7, 0 +; BE-NEXT: li 11, 32 +; BE-NEXT: clrlwi 4, 4, 3 +; BE-NEXT: cmplwi 4, 32 +; BE-NEXT: std 7, 56(6) +; BE-NEXT: std 7, 48(6) +; BE-NEXT: std 7, 40(6) +; BE-NEXT: std 7, 32(6) +; BE-NEXT: std 3, 16(6) +; BE-NEXT: std 10, 24(6) +; BE-NEXT: std 9, 8(6) +; BE-NEXT: std 8, -64(1) +; BE-NEXT: bc 12, 0, .LBB10_2 +; BE-NEXT: # %bb.1: +; BE-NEXT: ori 3, 11, 0 +; BE-NEXT: b .LBB10_3 ; BE-NEXT: .LBB10_2: -; BE-NEXT: ld 28, -32(1) # 8-byte Folded Reload -; BE-NEXT: ld 27, -40(1) # 8-byte Folded Reload -; BE-NEXT: or 12, 12, 0 -; BE-NEXT: sld 0, 7, 4 -; BE-NEXT: or 12, 0, 12 -; BE-NEXT: srd 0, 3, 30 -; BE-NEXT: sld 30, 3, 29 -; BE-NEXT: bc 12, 4, .LBB10_3 -; BE-NEXT: b .LBB10_4 +; BE-NEXT: addi 3, 4, 0 ; BE-NEXT: .LBB10_3: -; BE-NEXT: addi 11, 12, 0 -; BE-NEXT: .LBB10_4: -; BE-NEXT: sld 3, 3, 4 -; BE-NEXT: bc 12, 2, .LBB10_6 -; BE-NEXT: # %bb.5: -; BE-NEXT: ori 4, 10, 0 -; BE-NEXT: b .LBB10_7 -; BE-NEXT: .LBB10_6: -; BE-NEXT: addi 4, 8, 0 -; BE-NEXT: .LBB10_7: -; BE-NEXT: ld 29, -24(1) # 8-byte Folded Reload -; BE-NEXT: or 9, 9, 0 -; BE-NEXT: or 9, 9, 30 -; BE-NEXT: bc 12, 2, .LBB10_9 -; BE-NEXT: # %bb.8: -; BE-NEXT: ori 7, 11, 0 -; BE-NEXT: b .LBB10_9 -; BE-NEXT: .LBB10_9: -; BE-NEXT: bc 12, 4, .LBB10_11 -; BE-NEXT: # %bb.10: -; BE-NEXT: ori 8, 6, 0 -; BE-NEXT: ori 3, 6, 0 -; BE-NEXT: b .LBB10_12 -; BE-NEXT: .LBB10_11: -; BE-NEXT: addi 8, 9, 0 -; BE-NEXT: .LBB10_12: +; BE-NEXT: ldux 4, 3, 6 +; BE-NEXT: ld 6, 8(3) +; BE-NEXT: ld 7, 24(3) +; BE-NEXT: ld 3, 16(3) ; BE-NEXT: std 4, 0(5) -; BE-NEXT: ld 30, -16(1) # 8-byte Folded Reload -; BE-NEXT: std 3, 24(5) -; BE-NEXT: std 8, 16(5) -; BE-NEXT: std 7, 8(5) +; BE-NEXT: std 3, 16(5) +; BE-NEXT: std 7, 24(5) +; BE-NEXT: std 6, 8(5) ; BE-NEXT: blr ; ; LE-32BIT-LABEL: shl_32bytes: ; LE-32BIT: # %bb.0: -; LE-32BIT-NEXT: stwu 1, -144(1) -; LE-32BIT-NEXT: mfcr 12 -; LE-32BIT-NEXT: stw 14, 72(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 15, 76(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 16, 80(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 17, 84(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 18, 88(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 19, 92(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 20, 96(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 21, 100(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 22, 104(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 23, 108(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 24, 112(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 25, 116(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 26, 120(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 27, 124(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 28, 128(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 29, 132(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 30, 136(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 31, 140(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 12, 68(1) -; LE-32BIT-NEXT: lwz 0, 28(4) -; LE-32BIT-NEXT: stw 5, 64(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: lwz 6, 24(3) -; LE-32BIT-NEXT: rlwinm. 30, 0, 3, 0, 28 -; LE-32BIT-NEXT: lwz 5, 28(3) -; LE-32BIT-NEXT: subfic 21, 30, 224 +; LE-32BIT-NEXT: stwu 1, -80(1) +; LE-32BIT-NEXT: lwz 4, 28(4) +; LE-32BIT-NEXT: li 6, 0 ; LE-32BIT-NEXT: lwz 7, 4(3) -; LE-32BIT-NEXT: subfic 0, 30, 160 -; LE-32BIT-NEXT: lwz 9, 0(3) -; LE-32BIT-NEXT: addi 4, 30, -128 +; LE-32BIT-NEXT: lwz 8, 0(3) +; LE-32BIT-NEXT: clrlwi 4, 4, 3 +; LE-32BIT-NEXT: lwz 9, 12(3) +; LE-32BIT-NEXT: cmplwi 4, 32 ; LE-32BIT-NEXT: lwz 10, 8(3) -; LE-32BIT-NEXT: subfic 28, 30, 96 -; LE-32BIT-NEXT: lwz 8, 12(3) -; LE-32BIT-NEXT: addi 29, 30, -64 -; LE-32BIT-NEXT: lwz 12, 16(3) -; LE-32BIT-NEXT: subfic 25, 30, 32 ; LE-32BIT-NEXT: lwz 11, 20(3) -; LE-32BIT-NEXT: addi 3, 30, -192 -; LE-32BIT-NEXT: srw 21, 5, 21 -; LE-32BIT-NEXT: slw 16, 6, 3 -; LE-32BIT-NEXT: stw 3, 56(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: slw 20, 9, 30 -; LE-32BIT-NEXT: srw 15, 11, 0 -; LE-32BIT-NEXT: slw 14, 12, 4 -; LE-32BIT-NEXT: srw 31, 8, 28 -; LE-32BIT-NEXT: slw 3, 10, 29 -; LE-32BIT-NEXT: or 21, 16, 21 -; LE-32BIT-NEXT: srw 16, 7, 25 -; LE-32BIT-NEXT: slw 19, 10, 30 -; LE-32BIT-NEXT: or 15, 14, 15 -; LE-32BIT-NEXT: srw 14, 8, 25 -; LE-32BIT-NEXT: or 3, 3, 31 -; LE-32BIT-NEXT: srw 31, 5, 0 -; LE-32BIT-NEXT: or 20, 20, 16 -; LE-32BIT-NEXT: slw 16, 6, 4 -; LE-32BIT-NEXT: addi 27, 30, -224 -; LE-32BIT-NEXT: or 19, 19, 14 -; LE-32BIT-NEXT: srw 14, 5, 28 -; LE-32BIT-NEXT: or 16, 16, 31 -; LE-32BIT-NEXT: slw 31, 6, 29 -; LE-32BIT-NEXT: addi 23, 30, -160 -; LE-32BIT-NEXT: slw 18, 12, 30 -; LE-32BIT-NEXT: stw 0, 40(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: or 14, 31, 14 -; LE-32BIT-NEXT: srw 31, 11, 25 -; LE-32BIT-NEXT: slw 0, 5, 27 -; LE-32BIT-NEXT: addi 26, 30, -96 -; LE-32BIT-NEXT: slw 17, 6, 30 -; LE-32BIT-NEXT: or 18, 18, 31 -; LE-32BIT-NEXT: srw 31, 5, 25 -; LE-32BIT-NEXT: or 21, 21, 0 -; LE-32BIT-NEXT: slw 0, 11, 23 -; LE-32BIT-NEXT: or 17, 17, 31 -; LE-32BIT-NEXT: addi 31, 30, -32 -; LE-32BIT-NEXT: or 0, 15, 0 -; LE-32BIT-NEXT: slw 15, 8, 26 -; LE-32BIT-NEXT: stw 29, 52(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: or 29, 3, 15 -; LE-32BIT-NEXT: slw 15, 7, 31 -; LE-32BIT-NEXT: or 20, 20, 15 -; LE-32BIT-NEXT: slw 15, 8, 31 -; LE-32BIT-NEXT: or 3, 19, 15 -; LE-32BIT-NEXT: subfic 15, 30, 128 -; LE-32BIT-NEXT: slw 23, 5, 23 -; LE-32BIT-NEXT: stw 3, 48(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: or 3, 16, 23 -; LE-32BIT-NEXT: subfic 16, 15, 32 -; LE-32BIT-NEXT: stw 3, 44(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: srw 3, 11, 15 -; LE-32BIT-NEXT: slw 22, 12, 16 -; LE-32BIT-NEXT: or 23, 3, 22 -; LE-32BIT-NEXT: subfic 22, 30, 64 -; LE-32BIT-NEXT: stw 9, 60(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: mr 9, 10 -; LE-32BIT-NEXT: subfic 3, 22, 32 -; LE-32BIT-NEXT: stw 4, 36(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: srw 4, 8, 22 -; LE-32BIT-NEXT: slw 24, 9, 3 -; LE-32BIT-NEXT: or 4, 4, 24 -; LE-32BIT-NEXT: subfic 24, 30, 192 -; LE-32BIT-NEXT: subfic 27, 24, 32 -; LE-32BIT-NEXT: mr 10, 26 -; LE-32BIT-NEXT: slw 27, 6, 27 -; LE-32BIT-NEXT: srw 26, 5, 24 -; LE-32BIT-NEXT: stw 28, 24(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: or 27, 26, 27 -; LE-32BIT-NEXT: srw 26, 11, 22 -; LE-32BIT-NEXT: slw 28, 12, 3 -; LE-32BIT-NEXT: or 28, 26, 28 -; LE-32BIT-NEXT: srw 26, 5, 15 -; LE-32BIT-NEXT: slw 19, 6, 16 -; LE-32BIT-NEXT: or 26, 26, 19 -; LE-32BIT-NEXT: slw 19, 5, 10 -; LE-32BIT-NEXT: stw 7, 32(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: mr 7, 9 -; LE-32BIT-NEXT: or 19, 14, 19 -; LE-32BIT-NEXT: slw 14, 11, 31 -; LE-32BIT-NEXT: lwz 9, 64(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 18, 18, 14 -; LE-32BIT-NEXT: slw 3, 6, 3 -; LE-32BIT-NEXT: srw 14, 5, 22 -; LE-32BIT-NEXT: cmplwi 5, 30, 64 -; LE-32BIT-NEXT: cmplwi 1, 30, 128 -; LE-32BIT-NEXT: srw 24, 6, 24 -; LE-32BIT-NEXT: or 10, 14, 3 -; LE-32BIT-NEXT: slw 14, 5, 31 -; LE-32BIT-NEXT: crnand 28, 4, 20 -; LE-32BIT-NEXT: slw 31, 5, 30 -; LE-32BIT-NEXT: or 24, 0, 24 -; LE-32BIT-NEXT: mr 3, 7 -; LE-32BIT-NEXT: stw 7, 28(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: srw 0, 7, 22 -; LE-32BIT-NEXT: lwz 7, 24(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 17, 17, 14 -; LE-32BIT-NEXT: bc 12, 28, .LBB10_2 -; LE-32BIT-NEXT: # %bb.1: -; LE-32BIT-NEXT: ori 14, 31, 0 -; LE-32BIT-NEXT: b .LBB10_3 +; LE-32BIT-NEXT: lwz 12, 16(3) +; LE-32BIT-NEXT: lwz 0, 28(3) +; LE-32BIT-NEXT: lwz 3, 24(3) +; LE-32BIT-NEXT: stw 6, 76(1) +; LE-32BIT-NEXT: stw 3, 40(1) +; LE-32BIT-NEXT: li 3, 32 +; LE-32BIT-NEXT: stw 6, 72(1) +; LE-32BIT-NEXT: bc 12, 0, .LBB10_1 +; LE-32BIT-NEXT: b .LBB10_2 +; LE-32BIT-NEXT: .LBB10_1: +; LE-32BIT-NEXT: addi 3, 4, 0 ; LE-32BIT-NEXT: .LBB10_2: -; LE-32BIT-NEXT: li 14, 0 -; LE-32BIT-NEXT: .LBB10_3: -; LE-32BIT-NEXT: or 20, 20, 0 -; LE-32BIT-NEXT: subfic 0, 15, 64 -; LE-32BIT-NEXT: stw 14, 28(9) -; LE-32BIT-NEXT: subfic 14, 0, 32 -; LE-32BIT-NEXT: srw 14, 11, 14 -; LE-32BIT-NEXT: slw 31, 12, 0 -; LE-32BIT-NEXT: or 14, 31, 14 -; LE-32BIT-NEXT: srw 31, 12, 7 -; LE-32BIT-NEXT: or 23, 23, 31 -; LE-32BIT-NEXT: srw 31, 3, 25 -; LE-32BIT-NEXT: lwz 3, 40(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 4, 4, 31 -; LE-32BIT-NEXT: slw 0, 11, 0 -; LE-32BIT-NEXT: cmplwi 3, 15, 0 -; LE-32BIT-NEXT: srw 31, 6, 3 -; LE-32BIT-NEXT: or 27, 27, 31 -; LE-32BIT-NEXT: srw 31, 12, 25 -; LE-32BIT-NEXT: or 28, 28, 31 -; LE-32BIT-NEXT: srw 31, 6, 7 -; LE-32BIT-NEXT: or 26, 26, 31 -; LE-32BIT-NEXT: srw 31, 6, 22 -; LE-32BIT-NEXT: or 18, 18, 31 -; LE-32BIT-NEXT: lwz 31, 36(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: srw 25, 6, 25 -; LE-32BIT-NEXT: or 3, 10, 25 -; LE-32BIT-NEXT: or 26, 26, 0 -; LE-32BIT-NEXT: cmplwi 6, 31, 64 -; LE-32BIT-NEXT: slw 0, 11, 30 -; LE-32BIT-NEXT: bc 12, 24, .LBB10_5 -; LE-32BIT-NEXT: # %bb.4: -; LE-32BIT-NEXT: ori 25, 21, 0 -; LE-32BIT-NEXT: b .LBB10_6 -; LE-32BIT-NEXT: .LBB10_5: -; LE-32BIT-NEXT: addi 25, 24, 0 -; LE-32BIT-NEXT: .LBB10_6: -; LE-32BIT-NEXT: slw 24, 11, 16 -; LE-32BIT-NEXT: lwz 10, 32(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 3, 0, 3 -; LE-32BIT-NEXT: bc 12, 28, .LBB10_8 -; LE-32BIT-NEXT: # %bb.7: -; LE-32BIT-NEXT: ori 0, 17, 0 -; LE-32BIT-NEXT: b .LBB10_9 -; LE-32BIT-NEXT: .LBB10_8: -; LE-32BIT-NEXT: li 0, 0 -; LE-32BIT-NEXT: .LBB10_9: -; LE-32BIT-NEXT: or 24, 14, 24 -; LE-32BIT-NEXT: stw 0, 24(9) -; LE-32BIT-NEXT: srw 0, 6, 15 -; LE-32BIT-NEXT: or 24, 0, 24 -; LE-32BIT-NEXT: lwz 0, 56(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: slw 21, 10, 30 -; LE-32BIT-NEXT: bc 12, 20, .LBB10_11 -; LE-32BIT-NEXT: # %bb.10: -; LE-32BIT-NEXT: ori 7, 29, 0 -; LE-32BIT-NEXT: b .LBB10_12 -; LE-32BIT-NEXT: .LBB10_11: -; LE-32BIT-NEXT: addi 7, 20, 0 -; LE-32BIT-NEXT: .LBB10_12: -; LE-32BIT-NEXT: or 4, 21, 4 -; LE-32BIT-NEXT: slw 21, 11, 31 -; LE-32BIT-NEXT: srw 20, 12, 15 -; LE-32BIT-NEXT: cmplwi 7, 15, 64 -; LE-32BIT-NEXT: li 15, 0 -; LE-32BIT-NEXT: or 27, 21, 27 -; LE-32BIT-NEXT: bc 12, 20, .LBB10_14 -; LE-32BIT-NEXT: # %bb.13: -; LE-32BIT-NEXT: ori 21, 19, 0 -; LE-32BIT-NEXT: b .LBB10_15 -; LE-32BIT-NEXT: .LBB10_14: -; LE-32BIT-NEXT: addi 21, 18, 0 -; LE-32BIT-NEXT: .LBB10_15: -; LE-32BIT-NEXT: mr 16, 9 -; LE-32BIT-NEXT: lwz 9, 52(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: slw 18, 5, 0 -; LE-32BIT-NEXT: bc 12, 28, .LBB10_17 -; LE-32BIT-NEXT: # %bb.16: -; LE-32BIT-NEXT: ori 0, 15, 0 -; LE-32BIT-NEXT: b .LBB10_18 -; LE-32BIT-NEXT: .LBB10_17: -; LE-32BIT-NEXT: addi 0, 20, 0 -; LE-32BIT-NEXT: .LBB10_18: -; LE-32BIT-NEXT: lwz 20, 60(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: slw 30, 8, 30 -; LE-32BIT-NEXT: slw 19, 8, 9 -; LE-32BIT-NEXT: slw 17, 5, 9 -; LE-32BIT-NEXT: bc 12, 2, .LBB10_20 -; LE-32BIT-NEXT: # %bb.19: -; LE-32BIT-NEXT: ori 9, 7, 0 -; LE-32BIT-NEXT: b .LBB10_21 -; LE-32BIT-NEXT: .LBB10_20: -; LE-32BIT-NEXT: addi 9, 20, 0 -; LE-32BIT-NEXT: .LBB10_21: -; LE-32BIT-NEXT: lwz 7, 48(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: slw 29, 5, 31 -; LE-32BIT-NEXT: or 9, 9, 0 -; LE-32BIT-NEXT: bc 12, 20, .LBB10_23 -; LE-32BIT-NEXT: # %bb.22: -; LE-32BIT-NEXT: ori 0, 15, 0 -; LE-32BIT-NEXT: b .LBB10_24 -; LE-32BIT-NEXT: .LBB10_23: -; LE-32BIT-NEXT: addi 0, 30, 0 -; LE-32BIT-NEXT: .LBB10_24: -; LE-32BIT-NEXT: bc 12, 24, .LBB10_26 -; LE-32BIT-NEXT: # %bb.25: -; LE-32BIT-NEXT: ori 30, 15, 0 -; LE-32BIT-NEXT: b .LBB10_27 -; LE-32BIT-NEXT: .LBB10_26: -; LE-32BIT-NEXT: addi 30, 29, 0 -; LE-32BIT-NEXT: .LBB10_27: -; LE-32BIT-NEXT: bc 12, 28, .LBB10_28 -; LE-32BIT-NEXT: b .LBB10_29 -; LE-32BIT-NEXT: .LBB10_28: -; LE-32BIT-NEXT: addi 28, 26, 0 -; LE-32BIT-NEXT: .LBB10_29: -; LE-32BIT-NEXT: bc 12, 20, .LBB10_31 -; LE-32BIT-NEXT: # %bb.30: -; LE-32BIT-NEXT: ori 3, 17, 0 -; LE-32BIT-NEXT: b .LBB10_31 -; LE-32BIT-NEXT: .LBB10_31: -; LE-32BIT-NEXT: srw 22, 12, 22 -; LE-32BIT-NEXT: bc 12, 20, .LBB10_33 -; LE-32BIT-NEXT: # %bb.32: -; LE-32BIT-NEXT: ori 29, 15, 0 -; LE-32BIT-NEXT: b .LBB10_34 -; LE-32BIT-NEXT: .LBB10_33: -; LE-32BIT-NEXT: addi 29, 7, 0 -; LE-32BIT-NEXT: .LBB10_34: -; LE-32BIT-NEXT: lwz 7, 44(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: bc 12, 20, .LBB10_36 -; LE-32BIT-NEXT: # %bb.35: -; LE-32BIT-NEXT: ori 4, 19, 0 -; LE-32BIT-NEXT: b .LBB10_36 -; LE-32BIT-NEXT: .LBB10_36: -; LE-32BIT-NEXT: bc 12, 14, .LBB10_38 -; LE-32BIT-NEXT: # %bb.37: -; LE-32BIT-NEXT: ori 5, 28, 0 -; LE-32BIT-NEXT: b .LBB10_38 -; LE-32BIT-NEXT: .LBB10_38: -; LE-32BIT-NEXT: li 28, 0 -; LE-32BIT-NEXT: bc 12, 2, .LBB10_39 -; LE-32BIT-NEXT: b .LBB10_40 -; LE-32BIT-NEXT: .LBB10_39: -; LE-32BIT-NEXT: addi 3, 11, 0 -; LE-32BIT-NEXT: .LBB10_40: -; LE-32BIT-NEXT: cmplwi 2, 31, 0 -; LE-32BIT-NEXT: bc 12, 24, .LBB10_42 -; LE-32BIT-NEXT: # %bb.41: -; LE-32BIT-NEXT: ori 27, 18, 0 -; LE-32BIT-NEXT: b .LBB10_42 -; LE-32BIT-NEXT: .LBB10_42: -; LE-32BIT-NEXT: bc 12, 28, .LBB10_44 -; LE-32BIT-NEXT: # %bb.43: -; LE-32BIT-NEXT: ori 26, 22, 0 -; LE-32BIT-NEXT: b .LBB10_45 -; LE-32BIT-NEXT: .LBB10_44: -; LE-32BIT-NEXT: addi 26, 24, 0 -; LE-32BIT-NEXT: .LBB10_45: -; LE-32BIT-NEXT: bc 12, 2, .LBB10_46 -; LE-32BIT-NEXT: b .LBB10_47 -; LE-32BIT-NEXT: .LBB10_46: -; LE-32BIT-NEXT: addi 4, 10, 0 -; LE-32BIT-NEXT: .LBB10_47: -; LE-32BIT-NEXT: bc 12, 4, .LBB10_49 -; LE-32BIT-NEXT: # %bb.48: -; LE-32BIT-NEXT: ori 3, 28, 0 -; LE-32BIT-NEXT: b .LBB10_49 -; LE-32BIT-NEXT: .LBB10_49: -; LE-32BIT-NEXT: bc 12, 10, .LBB10_50 -; LE-32BIT-NEXT: b .LBB10_51 -; LE-32BIT-NEXT: .LBB10_50: -; LE-32BIT-NEXT: addi 25, 12, 0 -; LE-32BIT-NEXT: .LBB10_51: -; LE-32BIT-NEXT: or 5, 0, 5 -; LE-32BIT-NEXT: bc 12, 24, .LBB10_53 -; LE-32BIT-NEXT: # %bb.52: -; LE-32BIT-NEXT: ori 24, 15, 0 -; LE-32BIT-NEXT: b .LBB10_54 -; LE-32BIT-NEXT: .LBB10_53: -; LE-32BIT-NEXT: addi 24, 7, 0 -; LE-32BIT-NEXT: .LBB10_54: -; LE-32BIT-NEXT: bc 12, 28, .LBB10_56 -; LE-32BIT-NEXT: # %bb.55: -; LE-32BIT-NEXT: ori 7, 15, 0 -; LE-32BIT-NEXT: b .LBB10_57 -; LE-32BIT-NEXT: .LBB10_56: -; LE-32BIT-NEXT: addi 7, 23, 0 -; LE-32BIT-NEXT: .LBB10_57: -; LE-32BIT-NEXT: bc 12, 10, .LBB10_58 -; LE-32BIT-NEXT: b .LBB10_59 -; LE-32BIT-NEXT: .LBB10_58: -; LE-32BIT-NEXT: addi 27, 11, 0 -; LE-32BIT-NEXT: .LBB10_59: -; LE-32BIT-NEXT: stw 3, 20(16) -; LE-32BIT-NEXT: or 3, 4, 7 -; LE-32BIT-NEXT: bc 12, 4, .LBB10_61 -; LE-32BIT-NEXT: # %bb.60: -; LE-32BIT-NEXT: ori 3, 27, 0 -; LE-32BIT-NEXT: ori 9, 25, 0 -; LE-32BIT-NEXT: b .LBB10_61 -; LE-32BIT-NEXT: .LBB10_61: -; LE-32BIT-NEXT: bc 12, 14, .LBB10_63 -; LE-32BIT-NEXT: # %bb.62: -; LE-32BIT-NEXT: ori 6, 26, 0 -; LE-32BIT-NEXT: b .LBB10_63 -; LE-32BIT-NEXT: .LBB10_63: -; LE-32BIT-NEXT: bc 12, 2, .LBB10_65 -; LE-32BIT-NEXT: # %bb.64: -; LE-32BIT-NEXT: ori 12, 21, 0 -; LE-32BIT-NEXT: b .LBB10_65 -; LE-32BIT-NEXT: .LBB10_65: -; LE-32BIT-NEXT: bc 12, 4, .LBB10_67 -; LE-32BIT-NEXT: # %bb.66: -; LE-32BIT-NEXT: ori 5, 30, 0 -; LE-32BIT-NEXT: b .LBB10_67 -; LE-32BIT-NEXT: .LBB10_67: -; LE-32BIT-NEXT: bc 12, 2, .LBB10_69 -; LE-32BIT-NEXT: # %bb.68: -; LE-32BIT-NEXT: ori 4, 9, 0 -; LE-32BIT-NEXT: b .LBB10_70 -; LE-32BIT-NEXT: .LBB10_69: -; LE-32BIT-NEXT: addi 3, 10, 0 -; LE-32BIT-NEXT: addi 4, 20, 0 -; LE-32BIT-NEXT: .LBB10_70: -; LE-32BIT-NEXT: bc 12, 4, .LBB10_72 -; LE-32BIT-NEXT: # %bb.71: -; LE-32BIT-NEXT: ori 12, 15, 0 -; LE-32BIT-NEXT: b .LBB10_72 -; LE-32BIT-NEXT: .LBB10_72: -; LE-32BIT-NEXT: bc 12, 2, .LBB10_73 -; LE-32BIT-NEXT: b .LBB10_74 -; LE-32BIT-NEXT: .LBB10_73: -; LE-32BIT-NEXT: addi 5, 8, 0 -; LE-32BIT-NEXT: .LBB10_74: -; LE-32BIT-NEXT: stw 3, 4(16) -; LE-32BIT-NEXT: lwz 3, 28(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: stw 4, 0(16) -; LE-32BIT-NEXT: or 4, 29, 6 -; LE-32BIT-NEXT: bc 12, 4, .LBB10_76 -; LE-32BIT-NEXT: # %bb.75: -; LE-32BIT-NEXT: ori 4, 24, 0 -; LE-32BIT-NEXT: b .LBB10_76 -; LE-32BIT-NEXT: .LBB10_76: -; LE-32BIT-NEXT: stw 12, 16(16) -; LE-32BIT-NEXT: bc 12, 2, .LBB10_78 -; LE-32BIT-NEXT: # %bb.77: -; LE-32BIT-NEXT: ori 3, 4, 0 -; LE-32BIT-NEXT: b .LBB10_78 -; LE-32BIT-NEXT: .LBB10_78: -; LE-32BIT-NEXT: stw 5, 12(16) -; LE-32BIT-NEXT: stw 3, 8(16) -; LE-32BIT-NEXT: lwz 12, 68(1) -; LE-32BIT-NEXT: lwz 31, 140(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: mtcrf 32, 12 # cr2 -; LE-32BIT-NEXT: mtcrf 16, 12 # cr3 -; LE-32BIT-NEXT: lwz 30, 136(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 29, 132(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 28, 128(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 27, 124(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 26, 120(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 25, 116(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 24, 112(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 23, 108(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 22, 104(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 21, 100(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 20, 96(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 19, 92(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 18, 88(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 17, 84(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 16, 80(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 15, 76(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 14, 72(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: addi 1, 1, 144 +; LE-32BIT-NEXT: stw 6, 68(1) +; LE-32BIT-NEXT: stw 6, 64(1) +; LE-32BIT-NEXT: stw 6, 60(1) +; LE-32BIT-NEXT: stw 6, 56(1) +; LE-32BIT-NEXT: stw 6, 52(1) +; LE-32BIT-NEXT: stw 6, 48(1) +; LE-32BIT-NEXT: addi 6, 1, 16 +; LE-32BIT-NEXT: stw 0, 44(1) +; LE-32BIT-NEXT: stw 12, 32(1) +; LE-32BIT-NEXT: stw 11, 36(1) +; LE-32BIT-NEXT: stw 10, 24(1) +; LE-32BIT-NEXT: stw 9, 28(1) +; LE-32BIT-NEXT: stw 8, 16(1) +; LE-32BIT-NEXT: stw 7, 20(1) +; LE-32BIT-NEXT: lwzux 4, 3, 6 +; LE-32BIT-NEXT: lwz 6, 4(3) +; LE-32BIT-NEXT: lwz 7, 12(3) +; LE-32BIT-NEXT: lwz 8, 8(3) +; LE-32BIT-NEXT: lwz 9, 20(3) +; LE-32BIT-NEXT: lwz 10, 16(3) +; LE-32BIT-NEXT: lwz 11, 28(3) +; LE-32BIT-NEXT: lwz 3, 24(3) +; LE-32BIT-NEXT: stw 4, 0(5) +; LE-32BIT-NEXT: stw 3, 24(5) +; LE-32BIT-NEXT: stw 11, 28(5) +; LE-32BIT-NEXT: stw 10, 16(5) +; LE-32BIT-NEXT: stw 9, 20(5) +; LE-32BIT-NEXT: stw 8, 8(5) +; LE-32BIT-NEXT: stw 7, 12(5) +; LE-32BIT-NEXT: stw 6, 4(5) +; LE-32BIT-NEXT: addi 1, 1, 80 ; LE-32BIT-NEXT: blr %src = load i256, ptr %src.ptr, align 1 %byteOff = load i256, ptr %byteOff.ptr, align 1 @@ -1777,602 +699,126 @@ ; LE-64BIT-LABEL: ashr_32bytes: ; LE-64BIT: # %bb.0: ; LE-64BIT-NEXT: lwz 4, 0(4) -; LE-64BIT-NEXT: ld 6, 24(3) +; LE-64BIT-NEXT: ld 7, 24(3) ; LE-64BIT-NEXT: ld 8, 16(3) -; LE-64BIT-NEXT: std 28, -32(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: std 29, -24(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: std 30, -16(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: std 26, -48(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: rlwinm. 4, 4, 3, 0, 28 -; LE-64BIT-NEXT: sradi 9, 6, 63 -; LE-64BIT-NEXT: subfic 10, 4, 192 -; LE-64BIT-NEXT: addi 11, 4, -128 -; LE-64BIT-NEXT: addi 30, 4, -192 -; LE-64BIT-NEXT: sld 10, 6, 10 -; LE-64BIT-NEXT: srd 29, 8, 11 -; LE-64BIT-NEXT: subfic 28, 4, 64 -; LE-64BIT-NEXT: std 27, -40(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: ld 7, 0(3) -; LE-64BIT-NEXT: ld 3, 8(3) -; LE-64BIT-NEXT: srd 0, 8, 4 -; LE-64BIT-NEXT: srad 27, 6, 30 -; LE-64BIT-NEXT: or 10, 29, 10 -; LE-64BIT-NEXT: std 25, -56(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: cmpwi 1, 30, 1 -; LE-64BIT-NEXT: sld 26, 6, 28 -; LE-64BIT-NEXT: addi 30, 4, -64 -; LE-64BIT-NEXT: isel 10, 10, 27, 4 -; LE-64BIT-NEXT: or 27, 0, 26 -; LE-64BIT-NEXT: subfic 0, 4, 128 -; LE-64BIT-NEXT: srd 12, 7, 4 -; LE-64BIT-NEXT: sld 26, 3, 28 -; LE-64BIT-NEXT: subfic 25, 0, 64 -; LE-64BIT-NEXT: srad 29, 6, 30 -; LE-64BIT-NEXT: cmpwi 1, 30, 1 -; LE-64BIT-NEXT: or 12, 12, 26 -; LE-64BIT-NEXT: srd 30, 3, 30 -; LE-64BIT-NEXT: sld 28, 8, 28 -; LE-64BIT-NEXT: srd 26, 8, 25 -; LE-64BIT-NEXT: sld 8, 8, 0 -; LE-64BIT-NEXT: or 12, 12, 30 -; LE-64BIT-NEXT: ld 30, -16(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: ld 25, -56(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: sld 0, 6, 0 -; LE-64BIT-NEXT: isel 29, 27, 29, 4 -; LE-64BIT-NEXT: or 8, 12, 8 -; LE-64BIT-NEXT: or 0, 0, 26 -; LE-64BIT-NEXT: cmplwi 1, 4, 128 -; LE-64BIT-NEXT: ld 26, -48(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: srd 27, 3, 4 -; LE-64BIT-NEXT: or 0, 0, 28 -; LE-64BIT-NEXT: ld 28, -32(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: srad 11, 6, 11 -; LE-64BIT-NEXT: isel 8, 8, 10, 4 -; LE-64BIT-NEXT: or 10, 27, 0 -; LE-64BIT-NEXT: ld 27, -40(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: iseleq 7, 7, 8 -; LE-64BIT-NEXT: srad 4, 6, 4 -; LE-64BIT-NEXT: isel 8, 10, 11, 4 -; LE-64BIT-NEXT: std 7, 0(5) -; LE-64BIT-NEXT: isel 12, 29, 9, 4 -; LE-64BIT-NEXT: ld 29, -24(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: iseleq 3, 3, 8 -; LE-64BIT-NEXT: std 12, 16(5) -; LE-64BIT-NEXT: isel 4, 4, 9, 4 -; LE-64BIT-NEXT: std 3, 8(5) -; LE-64BIT-NEXT: std 4, 24(5) +; LE-64BIT-NEXT: lxvd2x 0, 0, 3 +; LE-64BIT-NEXT: addi 6, 1, -64 +; LE-64BIT-NEXT: clrlwi 3, 4, 3 +; LE-64BIT-NEXT: li 4, 32 +; LE-64BIT-NEXT: cmplwi 3, 32 +; LE-64BIT-NEXT: sradi 9, 7, 63 +; LE-64BIT-NEXT: std 8, 16(6) +; LE-64BIT-NEXT: std 7, 24(6) +; LE-64BIT-NEXT: isellt 3, 3, 4 +; LE-64BIT-NEXT: std 9, 56(6) +; LE-64BIT-NEXT: std 9, 48(6) +; LE-64BIT-NEXT: li 4, 16 +; LE-64BIT-NEXT: add 7, 6, 3 +; LE-64BIT-NEXT: std 9, 40(6) +; LE-64BIT-NEXT: std 9, 32(6) +; LE-64BIT-NEXT: stxvd2x 0, 0, 6 +; LE-64BIT-NEXT: lxvd2x 0, 6, 3 +; LE-64BIT-NEXT: lxvd2x 1, 7, 4 +; LE-64BIT-NEXT: stxvd2x 1, 5, 4 +; LE-64BIT-NEXT: stxvd2x 0, 0, 5 ; LE-64BIT-NEXT: blr ; ; BE-LABEL: ashr_32bytes: ; BE: # %bb.0: ; BE-NEXT: lwz 4, 28(4) -; BE-NEXT: ld 6, 16(3) -; BE-NEXT: ld 7, 24(3) +; BE-NEXT: ld 7, 0(3) ; BE-NEXT: ld 8, 8(3) -; BE-NEXT: ld 3, 0(3) -; BE-NEXT: std 27, -40(1) # 8-byte Folded Spill -; BE-NEXT: std 29, -24(1) # 8-byte Folded Spill -; BE-NEXT: std 28, -32(1) # 8-byte Folded Spill -; BE-NEXT: std 30, -16(1) # 8-byte Folded Spill -; BE-NEXT: rlwinm. 4, 4, 3, 0, 28 -; BE-NEXT: subfic 9, 4, 192 -; BE-NEXT: addi 10, 4, -128 -; BE-NEXT: addi 11, 4, -192 -; BE-NEXT: subfic 0, 4, 64 -; BE-NEXT: sld 9, 3, 9 -; BE-NEXT: srd 27, 8, 10 -; BE-NEXT: srd 12, 7, 4 -; BE-NEXT: subfic 29, 4, 128 -; BE-NEXT: cmpwi 1, 11, 1 -; BE-NEXT: srad 11, 3, 11 -; BE-NEXT: or 9, 27, 9 -; BE-NEXT: sld 27, 6, 0 -; BE-NEXT: addi 30, 4, -64 -; BE-NEXT: srd 28, 8, 4 -; BE-NEXT: or 12, 12, 27 -; BE-NEXT: sld 27, 3, 0 -; BE-NEXT: bc 12, 4, .LBB11_2 +; BE-NEXT: ld 9, 24(3) +; BE-NEXT: ld 3, 16(3) +; BE-NEXT: li 6, 32 +; BE-NEXT: addi 10, 1, -64 +; BE-NEXT: clrlwi 4, 4, 3 +; BE-NEXT: cmplwi 4, 32 +; BE-NEXT: bc 12, 0, .LBB11_2 ; BE-NEXT: # %bb.1: -; BE-NEXT: ori 9, 11, 0 +; BE-NEXT: ori 4, 6, 0 ; BE-NEXT: b .LBB11_2 ; BE-NEXT: .LBB11_2: -; BE-NEXT: subfic 11, 29, 64 -; BE-NEXT: or 28, 28, 27 -; BE-NEXT: srd 27, 6, 30 -; BE-NEXT: sld 0, 8, 0 -; BE-NEXT: srd 11, 8, 11 -; BE-NEXT: sld 8, 8, 29 -; BE-NEXT: sld 29, 3, 29 -; BE-NEXT: cmplwi 1, 4, 128 -; BE-NEXT: or 12, 12, 27 -; BE-NEXT: or 11, 29, 11 -; BE-NEXT: or 8, 12, 8 -; BE-NEXT: srd 12, 6, 4 -; BE-NEXT: or 11, 11, 0 -; BE-NEXT: srad 10, 3, 10 -; BE-NEXT: srad 29, 3, 30 -; BE-NEXT: or 11, 12, 11 -; BE-NEXT: cmpwi 5, 30, 1 -; BE-NEXT: bc 12, 20, .LBB11_4 -; BE-NEXT: # %bb.3: -; BE-NEXT: ori 12, 29, 0 -; BE-NEXT: b .LBB11_5 -; BE-NEXT: .LBB11_4: -; BE-NEXT: addi 12, 28, 0 -; BE-NEXT: .LBB11_5: -; BE-NEXT: bc 12, 4, .LBB11_7 -; BE-NEXT: # %bb.6: -; BE-NEXT: ori 8, 9, 0 -; BE-NEXT: ori 9, 10, 0 -; BE-NEXT: b .LBB11_8 -; BE-NEXT: .LBB11_7: -; BE-NEXT: addi 9, 11, 0 -; BE-NEXT: .LBB11_8: -; BE-NEXT: sradi 10, 3, 63 -; BE-NEXT: srad 3, 3, 4 -; BE-NEXT: ld 30, -16(1) # 8-byte Folded Reload -; BE-NEXT: ld 29, -24(1) # 8-byte Folded Reload -; BE-NEXT: ld 28, -32(1) # 8-byte Folded Reload -; BE-NEXT: ld 27, -40(1) # 8-byte Folded Reload -; BE-NEXT: bc 12, 2, .LBB11_10 -; BE-NEXT: # %bb.9: -; BE-NEXT: ori 4, 8, 0 -; BE-NEXT: ori 6, 9, 0 -; BE-NEXT: b .LBB11_11 -; BE-NEXT: .LBB11_10: -; BE-NEXT: addi 4, 7, 0 -; BE-NEXT: .LBB11_11: -; BE-NEXT: bc 12, 4, .LBB11_13 -; BE-NEXT: # %bb.12: -; BE-NEXT: ori 7, 10, 0 -; BE-NEXT: ori 3, 10, 0 -; BE-NEXT: b .LBB11_14 -; BE-NEXT: .LBB11_13: -; BE-NEXT: addi 7, 12, 0 -; BE-NEXT: .LBB11_14: +; BE-NEXT: sradi 6, 7, 63 +; BE-NEXT: std 3, 48(10) +; BE-NEXT: std 9, 56(10) +; BE-NEXT: std 7, 32(10) +; BE-NEXT: std 8, 40(10) +; BE-NEXT: neg 4, 4 +; BE-NEXT: std 6, 24(10) +; BE-NEXT: std 6, 16(10) +; BE-NEXT: std 6, 8(10) +; BE-NEXT: std 6, -64(1) +; BE-NEXT: clrldi 3, 4, 32 +; BE-NEXT: addi 4, 1, -32 +; BE-NEXT: ldux 3, 4, 3 +; BE-NEXT: ld 6, 8(4) +; BE-NEXT: ld 7, 24(4) +; BE-NEXT: ld 4, 16(4) ; BE-NEXT: std 3, 0(5) -; BE-NEXT: std 7, 8(5) -; BE-NEXT: std 4, 24(5) -; BE-NEXT: std 6, 16(5) +; BE-NEXT: std 4, 16(5) +; BE-NEXT: std 7, 24(5) +; BE-NEXT: std 6, 8(5) ; BE-NEXT: blr ; ; LE-32BIT-LABEL: ashr_32bytes: ; LE-32BIT: # %bb.0: -; LE-32BIT-NEXT: stwu 1, -144(1) -; LE-32BIT-NEXT: mfcr 12 -; LE-32BIT-NEXT: stw 14, 72(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 15, 76(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 16, 80(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 17, 84(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 18, 88(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 19, 92(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 20, 96(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 21, 100(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 22, 104(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 23, 108(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 24, 112(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 25, 116(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 26, 120(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 27, 124(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 28, 128(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 29, 132(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 30, 136(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 31, 140(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 12, 68(1) -; LE-32BIT-NEXT: lwz 0, 28(4) -; LE-32BIT-NEXT: lwz 29, 4(3) -; LE-32BIT-NEXT: lwz 12, 0(3) -; LE-32BIT-NEXT: rlwinm. 30, 0, 3, 0, 28 -; LE-32BIT-NEXT: stw 5, 64(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: subfic 23, 30, 224 -; LE-32BIT-NEXT: lwz 5, 24(3) -; LE-32BIT-NEXT: addi 21, 30, -224 -; LE-32BIT-NEXT: lwz 8, 28(3) -; LE-32BIT-NEXT: subfic 4, 30, 160 -; LE-32BIT-NEXT: lwz 10, 20(3) -; LE-32BIT-NEXT: addi 11, 30, -128 -; LE-32BIT-NEXT: lwz 9, 16(3) -; LE-32BIT-NEXT: subfic 25, 30, 96 -; LE-32BIT-NEXT: lwz 26, 12(3) -; LE-32BIT-NEXT: addi 0, 30, -64 -; LE-32BIT-NEXT: lwz 7, 8(3) -; LE-32BIT-NEXT: addi 3, 30, -192 -; LE-32BIT-NEXT: subfic 27, 30, 32 -; LE-32BIT-NEXT: slw 23, 12, 23 -; LE-32BIT-NEXT: srw 16, 29, 3 -; LE-32BIT-NEXT: stw 3, 56(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: srw 20, 8, 30 -; LE-32BIT-NEXT: sraw 15, 12, 21 -; LE-32BIT-NEXT: cmpwi 1, 21, 1 -; LE-32BIT-NEXT: slw 21, 7, 4 -; LE-32BIT-NEXT: srw 14, 26, 11 -; LE-32BIT-NEXT: slw 31, 9, 25 -; LE-32BIT-NEXT: srw 3, 10, 0 -; LE-32BIT-NEXT: or 23, 16, 23 -; LE-32BIT-NEXT: slw 16, 5, 27 -; LE-32BIT-NEXT: srw 19, 10, 30 -; LE-32BIT-NEXT: or 21, 14, 21 -; LE-32BIT-NEXT: slw 14, 9, 27 -; LE-32BIT-NEXT: or 3, 3, 31 -; LE-32BIT-NEXT: slw 31, 12, 4 -; LE-32BIT-NEXT: or 20, 20, 16 -; LE-32BIT-NEXT: srw 16, 29, 11 -; LE-32BIT-NEXT: or 19, 19, 14 -; LE-32BIT-NEXT: slw 14, 12, 25 -; LE-32BIT-NEXT: or 16, 16, 31 -; LE-32BIT-NEXT: srw 31, 29, 0 -; LE-32BIT-NEXT: addi 24, 30, -160 -; LE-32BIT-NEXT: srw 18, 26, 30 -; LE-32BIT-NEXT: or 14, 31, 14 -; LE-32BIT-NEXT: slw 31, 7, 27 -; LE-32BIT-NEXT: addi 28, 30, -96 -; LE-32BIT-NEXT: srw 17, 29, 30 -; LE-32BIT-NEXT: stw 4, 32(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: or 18, 18, 31 -; LE-32BIT-NEXT: slw 31, 12, 27 -; LE-32BIT-NEXT: bc 12, 4, .LBB11_2 +; LE-32BIT-NEXT: stwu 1, -80(1) +; LE-32BIT-NEXT: lwz 4, 28(4) +; LE-32BIT-NEXT: li 6, 32 +; LE-32BIT-NEXT: lwz 7, 4(3) +; LE-32BIT-NEXT: lwz 8, 0(3) +; LE-32BIT-NEXT: clrlwi 4, 4, 3 +; LE-32BIT-NEXT: lwz 9, 12(3) +; LE-32BIT-NEXT: cmplwi 4, 32 +; LE-32BIT-NEXT: lwz 10, 8(3) +; LE-32BIT-NEXT: lwz 11, 20(3) +; LE-32BIT-NEXT: lwz 12, 16(3) +; LE-32BIT-NEXT: lwz 0, 28(3) +; LE-32BIT-NEXT: lwz 3, 24(3) +; LE-32BIT-NEXT: stw 7, 52(1) +; LE-32BIT-NEXT: addi 7, 1, 48 +; LE-32BIT-NEXT: stw 3, 72(1) +; LE-32BIT-NEXT: srawi 3, 8, 31 +; LE-32BIT-NEXT: stw 3, 44(1) +; LE-32BIT-NEXT: stw 3, 40(1) +; LE-32BIT-NEXT: stw 3, 36(1) +; LE-32BIT-NEXT: stw 3, 32(1) +; LE-32BIT-NEXT: stw 3, 28(1) +; LE-32BIT-NEXT: stw 3, 24(1) +; LE-32BIT-NEXT: stw 3, 20(1) +; LE-32BIT-NEXT: stw 3, 16(1) +; LE-32BIT-NEXT: bc 12, 0, .LBB11_2 ; LE-32BIT-NEXT: # %bb.1: -; LE-32BIT-NEXT: ori 4, 15, 0 +; LE-32BIT-NEXT: ori 3, 6, 0 ; LE-32BIT-NEXT: b .LBB11_3 ; LE-32BIT-NEXT: .LBB11_2: -; LE-32BIT-NEXT: addi 4, 23, 0 +; LE-32BIT-NEXT: addi 3, 4, 0 ; LE-32BIT-NEXT: .LBB11_3: -; LE-32BIT-NEXT: srw 15, 7, 24 -; LE-32BIT-NEXT: or 17, 17, 31 -; LE-32BIT-NEXT: addi 31, 30, -32 -; LE-32BIT-NEXT: or 21, 21, 15 -; LE-32BIT-NEXT: srw 15, 9, 28 -; LE-32BIT-NEXT: or 3, 3, 15 -; LE-32BIT-NEXT: srw 15, 5, 31 -; LE-32BIT-NEXT: or 20, 20, 15 -; LE-32BIT-NEXT: srw 15, 9, 31 -; LE-32BIT-NEXT: stw 3, 28(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: or 3, 19, 15 -; LE-32BIT-NEXT: subfic 15, 30, 64 -; LE-32BIT-NEXT: stw 4, 24(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: cmpwi 1, 24, 1 -; LE-32BIT-NEXT: sraw 24, 12, 24 -; LE-32BIT-NEXT: subfic 4, 15, 32 -; LE-32BIT-NEXT: stw 0, 52(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: srw 0, 26, 4 -; LE-32BIT-NEXT: stw 3, 48(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: bc 12, 4, .LBB11_5 -; LE-32BIT-NEXT: # %bb.4: -; LE-32BIT-NEXT: ori 3, 24, 0 -; LE-32BIT-NEXT: b .LBB11_6 -; LE-32BIT-NEXT: .LBB11_5: -; LE-32BIT-NEXT: addi 3, 16, 0 -; LE-32BIT-NEXT: .LBB11_6: -; LE-32BIT-NEXT: slw 16, 7, 15 -; LE-32BIT-NEXT: or 0, 16, 0 -; LE-32BIT-NEXT: subfic 16, 30, 128 -; LE-32BIT-NEXT: stw 5, 36(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: subfic 5, 16, 32 -; LE-32BIT-NEXT: stw 3, 44(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: slw 3, 12, 16 -; LE-32BIT-NEXT: srw 22, 29, 5 -; LE-32BIT-NEXT: stw 8, 60(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: mr 8, 10 -; LE-32BIT-NEXT: mr 10, 27 -; LE-32BIT-NEXT: or 23, 3, 22 -; LE-32BIT-NEXT: slw 22, 7, 16 -; LE-32BIT-NEXT: srw 27, 26, 5 -; LE-32BIT-NEXT: stw 11, 40(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: mr 6, 26 -; LE-32BIT-NEXT: or 11, 22, 27 -; LE-32BIT-NEXT: slw 22, 9, 15 -; LE-32BIT-NEXT: srw 26, 8, 4 -; LE-32BIT-NEXT: subfic 3, 30, 192 -; LE-32BIT-NEXT: or 26, 22, 26 -; LE-32BIT-NEXT: cmpwi 1, 28, 1 -; LE-32BIT-NEXT: sraw 22, 12, 28 -; LE-32BIT-NEXT: subfic 19, 3, 32 -; LE-32BIT-NEXT: srw 4, 29, 4 -; LE-32BIT-NEXT: slw 28, 12, 15 -; LE-32BIT-NEXT: stw 9, 20(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: srw 19, 29, 19 -; LE-32BIT-NEXT: slw 24, 12, 3 -; LE-32BIT-NEXT: or 9, 28, 4 -; LE-32BIT-NEXT: lwz 4, 64(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 24, 24, 19 -; LE-32BIT-NEXT: bc 12, 4, .LBB11_7 -; LE-32BIT-NEXT: b .LBB11_8 -; LE-32BIT-NEXT: .LBB11_7: -; LE-32BIT-NEXT: addi 22, 14, 0 -; LE-32BIT-NEXT: .LBB11_8: -; LE-32BIT-NEXT: srw 19, 7, 31 -; LE-32BIT-NEXT: cmplwi 5, 30, 64 -; LE-32BIT-NEXT: cmplwi 1, 30, 128 -; LE-32BIT-NEXT: slw 3, 29, 3 -; LE-32BIT-NEXT: or 19, 18, 19 -; LE-32BIT-NEXT: cmpwi 6, 31, 1 -; LE-32BIT-NEXT: sraw 18, 12, 31 -; LE-32BIT-NEXT: crand 28, 4, 20 -; LE-32BIT-NEXT: srawi 14, 12, 31 -; LE-32BIT-NEXT: sraw 31, 12, 30 -; LE-32BIT-NEXT: or 3, 21, 3 -; LE-32BIT-NEXT: slw 21, 8, 15 -; LE-32BIT-NEXT: bc 12, 24, .LBB11_10 -; LE-32BIT-NEXT: # %bb.9: -; LE-32BIT-NEXT: ori 28, 18, 0 -; LE-32BIT-NEXT: b .LBB11_11 -; LE-32BIT-NEXT: .LBB11_10: -; LE-32BIT-NEXT: addi 28, 17, 0 -; LE-32BIT-NEXT: .LBB11_11: -; LE-32BIT-NEXT: bc 12, 28, .LBB11_13 -; LE-32BIT-NEXT: # %bb.12: -; LE-32BIT-NEXT: ori 18, 14, 0 -; LE-32BIT-NEXT: b .LBB11_14 -; LE-32BIT-NEXT: .LBB11_13: -; LE-32BIT-NEXT: addi 18, 31, 0 -; LE-32BIT-NEXT: .LBB11_14: -; LE-32BIT-NEXT: or 21, 20, 21 -; LE-32BIT-NEXT: subfic 20, 16, 64 -; LE-32BIT-NEXT: stw 18, 0(4) -; LE-32BIT-NEXT: subfic 18, 20, 32 -; LE-32BIT-NEXT: slw 18, 7, 18 -; LE-32BIT-NEXT: srw 17, 6, 20 -; LE-32BIT-NEXT: or 18, 17, 18 -; LE-32BIT-NEXT: slw 17, 6, 10 -; LE-32BIT-NEXT: or 27, 0, 17 -; LE-32BIT-NEXT: slw 0, 29, 25 -; LE-32BIT-NEXT: mr 31, 8 -; LE-32BIT-NEXT: or 8, 23, 0 -; LE-32BIT-NEXT: slw 0, 6, 25 -; LE-32BIT-NEXT: or 11, 11, 0 -; LE-32BIT-NEXT: stw 11, 16(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: slw 0, 31, 10 -; LE-32BIT-NEXT: lwz 11, 32(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 0, 26, 0 -; LE-32BIT-NEXT: slw 25, 29, 10 -; LE-32BIT-NEXT: or 23, 9, 25 -; LE-32BIT-NEXT: slw 26, 29, 11 -; LE-32BIT-NEXT: or 26, 24, 26 -; LE-32BIT-NEXT: slw 24, 29, 15 -; LE-32BIT-NEXT: or 24, 19, 24 -; LE-32BIT-NEXT: lwz 19, 40(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: srw 25, 7, 20 -; LE-32BIT-NEXT: lwz 9, 24(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 8, 8, 25 -; LE-32BIT-NEXT: cmplwi 6, 19, 64 -; LE-32BIT-NEXT: srw 5, 7, 5 -; LE-32BIT-NEXT: bc 12, 24, .LBB11_16 -; LE-32BIT-NEXT: # %bb.15: -; LE-32BIT-NEXT: ori 3, 9, 0 -; LE-32BIT-NEXT: b .LBB11_16 -; LE-32BIT-NEXT: .LBB11_16: -; LE-32BIT-NEXT: lwz 9, 28(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 5, 18, 5 -; LE-32BIT-NEXT: lwz 17, 20(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: mr 18, 4 -; LE-32BIT-NEXT: bc 12, 20, .LBB11_18 -; LE-32BIT-NEXT: # %bb.17: -; LE-32BIT-NEXT: ori 10, 9, 0 -; LE-32BIT-NEXT: b .LBB11_19 -; LE-32BIT-NEXT: .LBB11_18: -; LE-32BIT-NEXT: addi 10, 21, 0 -; LE-32BIT-NEXT: .LBB11_19: -; LE-32BIT-NEXT: lwz 9, 36(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: bc 12, 20, .LBB11_21 -; LE-32BIT-NEXT: # %bb.20: -; LE-32BIT-NEXT: ori 24, 22, 0 -; LE-32BIT-NEXT: b .LBB11_21 -; LE-32BIT-NEXT: .LBB11_21: -; LE-32BIT-NEXT: cmplwi 2, 19, 0 -; LE-32BIT-NEXT: bc 12, 10, .LBB11_22 -; LE-32BIT-NEXT: b .LBB11_23 -; LE-32BIT-NEXT: .LBB11_22: -; LE-32BIT-NEXT: addi 3, 6, 0 -; LE-32BIT-NEXT: .LBB11_23: -; LE-32BIT-NEXT: cmplwi 3, 16, 0 -; LE-32BIT-NEXT: srw 25, 9, 30 -; LE-32BIT-NEXT: or 25, 25, 0 -; LE-32BIT-NEXT: srw 0, 7, 19 -; LE-32BIT-NEXT: or 26, 0, 26 -; LE-32BIT-NEXT: srw 0, 7, 30 -; LE-32BIT-NEXT: or 11, 0, 23 -; LE-32BIT-NEXT: bc 12, 28, .LBB11_25 -; LE-32BIT-NEXT: # %bb.24: -; LE-32BIT-NEXT: ori 0, 14, 0 -; LE-32BIT-NEXT: b .LBB11_26 -; LE-32BIT-NEXT: .LBB11_25: -; LE-32BIT-NEXT: addi 0, 28, 0 -; LE-32BIT-NEXT: .LBB11_26: -; LE-32BIT-NEXT: slw 28, 6, 16 -; LE-32BIT-NEXT: stw 0, 4(4) -; LE-32BIT-NEXT: slw 0, 29, 16 -; LE-32BIT-NEXT: lwz 4, 52(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 5, 0, 5 -; LE-32BIT-NEXT: lwz 0, 56(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: cmplwi 7, 16, 64 -; LE-32BIT-NEXT: slw 23, 6, 15 -; LE-32BIT-NEXT: srw 22, 17, 4 -; LE-32BIT-NEXT: li 15, 0 -; LE-32BIT-NEXT: sraw 21, 12, 0 -; LE-32BIT-NEXT: bc 12, 28, .LBB11_28 -; LE-32BIT-NEXT: # %bb.27: -; LE-32BIT-NEXT: ori 0, 15, 0 -; LE-32BIT-NEXT: b .LBB11_29 -; LE-32BIT-NEXT: .LBB11_28: -; LE-32BIT-NEXT: addi 0, 28, 0 -; LE-32BIT-NEXT: .LBB11_29: -; LE-32BIT-NEXT: bc 12, 20, .LBB11_31 -; LE-32BIT-NEXT: # %bb.30: -; LE-32BIT-NEXT: ori 28, 22, 0 -; LE-32BIT-NEXT: b .LBB11_32 -; LE-32BIT-NEXT: .LBB11_31: -; LE-32BIT-NEXT: addi 28, 25, 0 -; LE-32BIT-NEXT: .LBB11_32: -; LE-32BIT-NEXT: bc 12, 2, .LBB11_34 -; LE-32BIT-NEXT: # %bb.33: -; LE-32BIT-NEXT: ori 22, 24, 0 -; LE-32BIT-NEXT: b .LBB11_35 -; LE-32BIT-NEXT: .LBB11_34: -; LE-32BIT-NEXT: addi 22, 6, 0 -; LE-32BIT-NEXT: .LBB11_35: -; LE-32BIT-NEXT: lwz 6, 48(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: sraw 20, 12, 4 -; LE-32BIT-NEXT: lwz 16, 60(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: bc 12, 20, .LBB11_37 -; LE-32BIT-NEXT: # %bb.36: -; LE-32BIT-NEXT: ori 4, 20, 0 -; LE-32BIT-NEXT: b .LBB11_38 -; LE-32BIT-NEXT: .LBB11_37: -; LE-32BIT-NEXT: addi 4, 11, 0 -; LE-32BIT-NEXT: .LBB11_38: -; LE-32BIT-NEXT: srw 30, 17, 30 -; LE-32BIT-NEXT: bc 12, 20, .LBB11_40 -; LE-32BIT-NEXT: # %bb.39: -; LE-32BIT-NEXT: ori 25, 15, 0 -; LE-32BIT-NEXT: b .LBB11_41 -; LE-32BIT-NEXT: .LBB11_40: -; LE-32BIT-NEXT: addi 25, 6, 0 -; LE-32BIT-NEXT: .LBB11_41: -; LE-32BIT-NEXT: lwz 6, 44(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: bc 12, 28, .LBB11_43 -; LE-32BIT-NEXT: # %bb.42: -; LE-32BIT-NEXT: ori 8, 27, 0 -; LE-32BIT-NEXT: ori 5, 23, 0 -; LE-32BIT-NEXT: b .LBB11_43 -; LE-32BIT-NEXT: .LBB11_43: -; LE-32BIT-NEXT: bc 12, 2, .LBB11_44 -; LE-32BIT-NEXT: b .LBB11_45 -; LE-32BIT-NEXT: .LBB11_44: -; LE-32BIT-NEXT: addi 4, 7, 0 -; LE-32BIT-NEXT: .LBB11_45: -; LE-32BIT-NEXT: sraw 19, 12, 19 -; LE-32BIT-NEXT: bc 12, 2, .LBB11_46 -; LE-32BIT-NEXT: b .LBB11_47 -; LE-32BIT-NEXT: .LBB11_46: -; LE-32BIT-NEXT: addi 10, 16, 0 -; LE-32BIT-NEXT: .LBB11_47: -; LE-32BIT-NEXT: bc 12, 24, .LBB11_49 -; LE-32BIT-NEXT: # %bb.48: -; LE-32BIT-NEXT: ori 26, 21, 0 -; LE-32BIT-NEXT: b .LBB11_49 -; LE-32BIT-NEXT: .LBB11_49: -; LE-32BIT-NEXT: bc 12, 14, .LBB11_50 -; LE-32BIT-NEXT: b .LBB11_51 -; LE-32BIT-NEXT: .LBB11_50: -; LE-32BIT-NEXT: addi 5, 29, 0 -; LE-32BIT-NEXT: .LBB11_51: -; LE-32BIT-NEXT: bc 12, 4, .LBB11_53 -; LE-32BIT-NEXT: # %bb.52: -; LE-32BIT-NEXT: ori 4, 14, 0 -; LE-32BIT-NEXT: b .LBB11_53 -; LE-32BIT-NEXT: .LBB11_53: -; LE-32BIT-NEXT: or 10, 10, 0 -; LE-32BIT-NEXT: bc 12, 24, .LBB11_55 -; LE-32BIT-NEXT: # %bb.54: -; LE-32BIT-NEXT: ori 24, 14, 0 -; LE-32BIT-NEXT: b .LBB11_56 -; LE-32BIT-NEXT: .LBB11_55: -; LE-32BIT-NEXT: addi 24, 6, 0 -; LE-32BIT-NEXT: .LBB11_56: -; LE-32BIT-NEXT: lwz 6, 16(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: bc 12, 4, .LBB11_57 -; LE-32BIT-NEXT: b .LBB11_58 -; LE-32BIT-NEXT: .LBB11_57: -; LE-32BIT-NEXT: addi 3, 10, 0 -; LE-32BIT-NEXT: .LBB11_58: -; LE-32BIT-NEXT: bc 12, 20, .LBB11_60 -; LE-32BIT-NEXT: # %bb.59: -; LE-32BIT-NEXT: ori 0, 15, 0 -; LE-32BIT-NEXT: b .LBB11_61 -; LE-32BIT-NEXT: .LBB11_60: -; LE-32BIT-NEXT: addi 0, 30, 0 -; LE-32BIT-NEXT: .LBB11_61: -; LE-32BIT-NEXT: bc 12, 24, .LBB11_63 -; LE-32BIT-NEXT: # %bb.62: -; LE-32BIT-NEXT: ori 30, 14, 0 -; LE-32BIT-NEXT: b .LBB11_64 -; LE-32BIT-NEXT: .LBB11_63: -; LE-32BIT-NEXT: addi 30, 19, 0 -; LE-32BIT-NEXT: .LBB11_64: -; LE-32BIT-NEXT: bc 12, 2, .LBB11_65 -; LE-32BIT-NEXT: b .LBB11_66 -; LE-32BIT-NEXT: .LBB11_65: -; LE-32BIT-NEXT: addi 3, 16, 0 -; LE-32BIT-NEXT: .LBB11_66: -; LE-32BIT-NEXT: stw 4, 8(18) -; LE-32BIT-NEXT: bc 12, 28, .LBB11_68 -; LE-32BIT-NEXT: # %bb.67: -; LE-32BIT-NEXT: ori 27, 15, 0 -; LE-32BIT-NEXT: b .LBB11_69 -; LE-32BIT-NEXT: .LBB11_68: -; LE-32BIT-NEXT: addi 27, 6, 0 -; LE-32BIT-NEXT: .LBB11_69: -; LE-32BIT-NEXT: bc 12, 14, .LBB11_71 -; LE-32BIT-NEXT: # %bb.70: -; LE-32BIT-NEXT: ori 6, 8, 0 -; LE-32BIT-NEXT: b .LBB11_72 -; LE-32BIT-NEXT: .LBB11_71: -; LE-32BIT-NEXT: addi 6, 12, 0 -; LE-32BIT-NEXT: .LBB11_72: -; LE-32BIT-NEXT: bc 12, 2, .LBB11_74 -; LE-32BIT-NEXT: # %bb.73: -; LE-32BIT-NEXT: ori 8, 28, 0 -; LE-32BIT-NEXT: b .LBB11_75 -; LE-32BIT-NEXT: .LBB11_74: -; LE-32BIT-NEXT: addi 8, 9, 0 -; LE-32BIT-NEXT: .LBB11_75: -; LE-32BIT-NEXT: bc 12, 10, .LBB11_77 -; LE-32BIT-NEXT: # %bb.76: -; LE-32BIT-NEXT: ori 28, 26, 0 -; LE-32BIT-NEXT: b .LBB11_78 -; LE-32BIT-NEXT: .LBB11_77: -; LE-32BIT-NEXT: addi 28, 7, 0 -; LE-32BIT-NEXT: .LBB11_78: -; LE-32BIT-NEXT: stw 3, 28(18) -; LE-32BIT-NEXT: or 7, 8, 27 -; LE-32BIT-NEXT: or 4, 0, 6 -; LE-32BIT-NEXT: or 3, 25, 5 -; LE-32BIT-NEXT: bc 12, 4, .LBB11_80 -; LE-32BIT-NEXT: # %bb.79: -; LE-32BIT-NEXT: ori 6, 28, 0 -; LE-32BIT-NEXT: ori 4, 30, 0 -; LE-32BIT-NEXT: ori 3, 24, 0 -; LE-32BIT-NEXT: ori 12, 14, 0 -; LE-32BIT-NEXT: b .LBB11_81 -; LE-32BIT-NEXT: .LBB11_80: -; LE-32BIT-NEXT: addi 6, 7, 0 -; LE-32BIT-NEXT: addi 12, 22, 0 -; LE-32BIT-NEXT: .LBB11_81: -; LE-32BIT-NEXT: bc 12, 2, .LBB11_83 -; LE-32BIT-NEXT: # %bb.82: -; LE-32BIT-NEXT: ori 5, 6, 0 -; LE-32BIT-NEXT: b .LBB11_84 -; LE-32BIT-NEXT: .LBB11_83: -; LE-32BIT-NEXT: addi 5, 9, 0 -; LE-32BIT-NEXT: addi 4, 17, 0 -; LE-32BIT-NEXT: addi 3, 31, 0 -; LE-32BIT-NEXT: .LBB11_84: -; LE-32BIT-NEXT: stw 12, 12(18) -; LE-32BIT-NEXT: stw 5, 24(18) -; LE-32BIT-NEXT: stw 4, 16(18) -; LE-32BIT-NEXT: stw 3, 20(18) -; LE-32BIT-NEXT: lwz 12, 68(1) -; LE-32BIT-NEXT: lwz 31, 140(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: mtcrf 32, 12 # cr2 -; LE-32BIT-NEXT: mtcrf 16, 12 # cr3 -; LE-32BIT-NEXT: lwz 30, 136(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 29, 132(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 28, 128(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 27, 124(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 26, 120(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 25, 116(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 24, 112(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 23, 108(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 22, 104(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 21, 100(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 20, 96(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 19, 92(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 18, 88(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 17, 84(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 16, 80(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 15, 76(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 14, 72(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: addi 1, 1, 144 +; LE-32BIT-NEXT: stw 0, 76(1) +; LE-32BIT-NEXT: sub 3, 7, 3 +; LE-32BIT-NEXT: stw 12, 64(1) +; LE-32BIT-NEXT: stw 11, 68(1) +; LE-32BIT-NEXT: stw 10, 56(1) +; LE-32BIT-NEXT: stw 9, 60(1) +; LE-32BIT-NEXT: stw 8, 48(1) +; LE-32BIT-NEXT: lwz 4, 4(3) +; LE-32BIT-NEXT: lwz 6, 0(3) +; LE-32BIT-NEXT: lwz 7, 12(3) +; LE-32BIT-NEXT: lwz 8, 8(3) +; LE-32BIT-NEXT: lwz 9, 20(3) +; LE-32BIT-NEXT: lwz 10, 16(3) +; LE-32BIT-NEXT: lwz 11, 24(3) +; LE-32BIT-NEXT: lwz 3, 28(3) +; LE-32BIT-NEXT: stw 11, 24(5) +; LE-32BIT-NEXT: stw 3, 28(5) +; LE-32BIT-NEXT: stw 10, 16(5) +; LE-32BIT-NEXT: stw 9, 20(5) +; LE-32BIT-NEXT: stw 8, 8(5) +; LE-32BIT-NEXT: stw 7, 12(5) +; LE-32BIT-NEXT: stw 6, 0(5) +; LE-32BIT-NEXT: stw 4, 4(5) +; LE-32BIT-NEXT: addi 1, 1, 80 ; LE-32BIT-NEXT: blr %src = load i256, ptr %src.ptr, align 1 %byteOff = load i256, ptr %byteOff.ptr, align 1 diff --git a/llvm/test/CodeGen/X86/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/X86/wide-scalar-shift-legalization.ll --- a/llvm/test/CodeGen/X86/wide-scalar-shift-legalization.ll +++ b/llvm/test/CodeGen/X86/wide-scalar-shift-legalization.ll @@ -622,420 +622,47 @@ ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, (%rdx) ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq ; -; X32-NO-BMI2-NO-SHLD-LABEL: lshr_16bytes: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $32, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%ecx), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%ecx), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shlb $3, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: notb %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%eax), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: negb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%eax), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel (%esp), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 12(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 8(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 4(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $32, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: lshr_16bytes: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $32, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%ecx), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shlb $3, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ah -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ah -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%ecx), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 8(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 12(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, 4(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $32, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: lshr_16bytes: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $36, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%ecx), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%ecx), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlb $3, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esi), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%ecx), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl (%esp), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 12(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, 8(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, 4(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $36, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: lshr_16bytes: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $32, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlb $3, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%ebp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%eax), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl (%esp), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 8(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 12(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, 4(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $32, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: lshr_16bytes: +; X32: # %bb.0: +; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi +; X32-NEXT: subl $32, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl (%edx), %esi +; X32-NEXT: movl 4(%edx), %edi +; X32-NEXT: movl 12(%edx), %ebx +; X32-NEXT: movl 8(%edx), %edx +; X32-NEXT: movzbl (%ecx), %ecx +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %esi, (%esp) +; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X32-NEXT: andb $31, %cl +; X32-NEXT: movzbl %cl, %edx +; X32-NEXT: cmpb $16, %cl +; X32-NEXT: movl $16, %ecx +; X32-NEXT: cmovbl %edx, %ecx +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl (%esp,%ecx), %edx +; X32-NEXT: movl 4(%esp,%ecx), %esi +; X32-NEXT: movl 12(%esp,%ecx), %edi +; X32-NEXT: movl 8(%esp,%ecx), %ecx +; X32-NEXT: movl %ecx, 8(%eax) +; X32-NEXT: movl %edi, 12(%eax) +; X32-NEXT: movl %edx, (%eax) +; X32-NEXT: movl %esi, 4(%eax) +; X32-NEXT: addl $32, %esp +; X32-NEXT: popl %esi +; X32-NEXT: popl %edi +; X32-NEXT: popl %ebx +; X32-NEXT: retl %src = load i128, ptr %src.ptr, align 1 %byteOff = load i128, ptr %byteOff.ptr, align 1 %bitOff = shl i128 %byteOff, 3 @@ -1120,436 +747,48 @@ ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, (%rdx) ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq ; -; X32-NO-BMI2-NO-SHLD-LABEL: shl_16bytes: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $36, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esi), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%esi), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shlb $3, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: notb %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%edi), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: negb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%eax), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb %cl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, (%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 4(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, 12(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 8(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $36, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: shl_16bytes: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $32, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%ecx), %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%ecx), %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shlb $3, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ah -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ah -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 4(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, 8(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 12(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $32, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: shl_16bytes: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $36, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%ecx), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%ecx), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlb $3, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edi, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%ebx), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%ecx), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl (%esp), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 4(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, 12(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, 8(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $36, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: shl_16bytes: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $32, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%ecx), %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%ecx), %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlb $3, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%ebp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%eax), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl (%esp), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 4(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, 8(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 12(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $32, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: shl_16bytes: +; X32: # %bb.0: +; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi +; X32-NEXT: subl $32, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl (%edx), %esi +; X32-NEXT: movl 4(%edx), %edi +; X32-NEXT: movl 12(%edx), %ebx +; X32-NEXT: movl 8(%edx), %edx +; X32-NEXT: movzbl (%ecx), %ecx +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X32-NEXT: andb $31, %cl +; X32-NEXT: movzbl %cl, %edx +; X32-NEXT: cmpb $16, %cl +; X32-NEXT: movl $16, %ecx +; X32-NEXT: cmovbl %edx, %ecx +; X32-NEXT: negb %cl +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, (%esp) +; X32-NEXT: movl 16(%esp,%ecx), %edx +; X32-NEXT: movl 20(%esp,%ecx), %esi +; X32-NEXT: movl 28(%esp,%ecx), %edi +; X32-NEXT: movl 24(%esp,%ecx), %ecx +; X32-NEXT: movl %ecx, 8(%eax) +; X32-NEXT: movl %edi, 12(%eax) +; X32-NEXT: movl %edx, (%eax) +; X32-NEXT: movl %esi, 4(%eax) +; X32-NEXT: addl $32, %esp +; X32-NEXT: popl %esi +; X32-NEXT: popl %edi +; X32-NEXT: popl %ebx +; X32-NEXT: retl %src = load i128, ptr %src.ptr, align 1 %byteOff = load i128, ptr %byteOff.ptr, align 1 %bitOff = shl i128 %byteOff, 3 @@ -1634,433 +873,48 @@ ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, (%rdx) ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq ; -; X32-NO-BMI2-NO-SHLD-LABEL: ashr_16bytes: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $36, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%edx), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%edx), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%eax), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shlb $3, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%edi), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: sarl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: sarl $31, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: negb %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: sarl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%eax), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 12(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 8(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, 4(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $36, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: ashr_16bytes: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $32, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%ecx), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shlb $3, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl $31, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ah -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ah -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%ecx), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 8(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 12(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, 4(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $32, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: ashr_16bytes: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $44, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%edx), %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%edx), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlb $3, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: sarl $31, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: sarxl %ebx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esi), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl (%esp), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: sarxl %ecx, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%edx), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel (%esp), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 12(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 8(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 4(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $44, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: ashr_16bytes: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $36, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlb $3, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarl $31, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarxl %edx, %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%eax), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl (%esp), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarxl %ebx, %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%eax), %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel (%esp), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, 8(%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, 12(%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, 4(%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $36, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: ashr_16bytes: +; X32: # %bb.0: +; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi +; X32-NEXT: subl $32, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl (%edx), %esi +; X32-NEXT: movl 4(%edx), %edi +; X32-NEXT: movl 12(%edx), %ebx +; X32-NEXT: movl 8(%edx), %edx +; X32-NEXT: movzbl (%ecx), %ecx +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %esi, (%esp) +; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X32-NEXT: sarl $31, %ebx +; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-NEXT: andb $31, %cl +; X32-NEXT: movzbl %cl, %edx +; X32-NEXT: cmpb $16, %cl +; X32-NEXT: movl $16, %ecx +; X32-NEXT: cmovbl %edx, %ecx +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl (%esp,%ecx), %edx +; X32-NEXT: movl 4(%esp,%ecx), %esi +; X32-NEXT: movl 12(%esp,%ecx), %edi +; X32-NEXT: movl 8(%esp,%ecx), %ecx +; X32-NEXT: movl %ecx, 8(%eax) +; X32-NEXT: movl %edi, 12(%eax) +; X32-NEXT: movl %edx, (%eax) +; X32-NEXT: movl %esi, 4(%eax) +; X32-NEXT: addl $32, %esp +; X32-NEXT: popl %esi +; X32-NEXT: popl %edi +; X32-NEXT: popl %ebx +; X32-NEXT: retl %src = load i128, ptr %src.ptr, align 1 %byteOff = load i128, ptr %byteOff.ptr, align 1 %bitOff = shl i128 %byteOff, 3 @@ -2070,1948 +924,98 @@ } define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: lshr_32bytes: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movq (%rdi), %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NO-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %eax -; X64-NO-BMI2-NO-SHLD-NEXT: shlb $3, %al -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r10, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %r11d, %r11d -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rbp, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r10,%r10), %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: notb %sil -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdx, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r14, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r8,%r8), %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r12, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbp, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: subb %al, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r12, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r12, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r13, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r15, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rax), %esi -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdx, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r8, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: testb %al, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %r12, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %rbp, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r10, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %r11, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %r11, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rbx, 24(%r9) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, 16(%r9) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, (%r9) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, 8(%r9) -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: lshr_32bytes: +; X64: # %bb.0: +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq 8(%rdi), %rcx +; X64-NEXT: movq 24(%rdi), %r8 +; X64-NEXT: movq 16(%rdi), %rdi +; X64-NEXT: movzbl (%rsi), %esi +; X64-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %r8, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: andl $31, %esi +; X64-NEXT: movq -64(%rsp,%rsi), %rax +; X64-NEXT: movq -56(%rsp,%rsi), %rcx +; X64-NEXT: movq -40(%rsp,%rsi), %rdi +; X64-NEXT: movq -48(%rsp,%rsi), %rsi +; X64-NEXT: movq %rsi, 16(%rdx) +; X64-NEXT: movq %rdi, 24(%rdx) +; X64-NEXT: movq %rax, (%rdx) +; X64-NEXT: movq %rcx, 8(%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: lshr_32bytes: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 16(%rdi), %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %r9 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 24(%rdi), %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlb $3, %al -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r9, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r10, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r14, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r12, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %al, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r8, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r15, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r14, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rax), %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rdi, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r13, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r12, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rbx, 16(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r11, 24(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, 8(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: lshr_32bytes: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq (%rdi), %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %r9d -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlb $3, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %r10, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %esi, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r15, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %rax, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r14, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %rcx, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r9d, %r13d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r13b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %r12, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbx, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r14, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %rbx, %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%r10,%r10), %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %r14, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbp, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r15, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r13b -; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %r9b, %r13b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r13d, %r15d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r15b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rbx, %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrq %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r15, %rbp, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %r10, %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbp, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r13b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %rbx, %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r13, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r12, %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r11, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%r9), %r11d -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r11d, %r12d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r12b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r12, %r14, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r11, %rbx, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbx, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r11, %r10, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r11b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r10, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %r9b, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %r13, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rcx, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %r15, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %rsi, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %rsi, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r8, 24(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, 16(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r14, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r10, 8(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: lshr_32bytes: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 16(%rdi), %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 24(%rdi), %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %eax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlb $3, %al -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r9, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rax, %rdi, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r15, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rax, %r11, %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r12, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r11, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %al, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r11, %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r8, %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rcx, %r8, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r13, %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r14, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r15, %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rax), %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r11, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %r11, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r11, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %r12, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %r13, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rbx, 16(%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r10, 24(%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r11, 8(%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: lshr_32bytes: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $136, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%ecx), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl 28(%ecx), %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movb (%eax), %ch -; X32-NO-BMI2-NO-SHLD-NEXT: shlb $3, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl 20(%ebx), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%ebx), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl 24(%ebx), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%esi), %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: addl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-128, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: negb %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: addb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl 16(%eax), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: notb %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%eax), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, %al -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: addl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-128, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: subb %bl, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: negb %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ah # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnsl %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 28(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 24(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 16(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 20(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, 12(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 4(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 8(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $136, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: lshr_32bytes: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $120, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 28(%ecx), %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb (%eax), %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: shlb $3, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %eax, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %eax, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 20(%ebp), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%ebp), %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%edx), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 24(%ebp), %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-128, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 16(%esi), %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%eax), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnsl %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 24(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 28(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 16(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 20(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, (%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 8(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, 12(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $120, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: lshr_32bytes: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $160, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 20(%esi), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esi), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlb $3, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%esi), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 28(%esi), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 24(%ecx), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%eax), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-128, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $bl killed $bl killed $ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebp, %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebp, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 16(%esi), %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%eax), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebp, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, (%esp) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %cl, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %cl, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $0, (%esp) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $0, (%esp) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnsl %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 28(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 24(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 16(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 20(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, 12(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 4(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, 8(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $160, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: lshr_32bytes: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $128, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 28(%ecx), %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 24(%ecx), %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlb $3, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%edx), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 20(%edx), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-128, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edi, %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl (%esp), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 16(%eax), %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%eax), %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %esi, %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %dl, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 24(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 28(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 16(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 20(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, (%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 8(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, 12(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $128, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: lshr_32bytes: +; X32: # %bb.0: +; X32-NEXT: pushl %ebp +; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi +; X32-NEXT: subl $72, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl (%eax), %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl 4(%eax), %ecx +; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NEXT: movl 12(%eax), %esi +; X32-NEXT: movl 8(%eax), %edi +; X32-NEXT: movl 20(%eax), %ebx +; X32-NEXT: movl 16(%eax), %ebp +; X32-NEXT: movl 28(%eax), %edx +; X32-NEXT: movl 24(%eax), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movzbl (%eax), %eax +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: andl $31, %eax +; X32-NEXT: movl 8(%esp,%eax), %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl 12(%esp,%eax), %ecx +; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NEXT: movl 20(%esp,%eax), %esi +; X32-NEXT: movl 16(%esp,%eax), %edi +; X32-NEXT: movl 28(%esp,%eax), %ebx +; X32-NEXT: movl 24(%esp,%eax), %ebp +; X32-NEXT: movl 36(%esp,%eax), %edx +; X32-NEXT: movl 32(%esp,%eax), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %ecx, 24(%eax) +; X32-NEXT: movl %edx, 28(%eax) +; X32-NEXT: movl %ebp, 16(%eax) +; X32-NEXT: movl %ebx, 20(%eax) +; X32-NEXT: movl %edi, 8(%eax) +; X32-NEXT: movl %esi, 12(%eax) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, (%eax) +; X32-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, 4(%eax) +; X32-NEXT: addl $72, %esp +; X32-NEXT: popl %esi +; X32-NEXT: popl %edi +; X32-NEXT: popl %ebx +; X32-NEXT: popl %ebp +; X32-NEXT: retl %src = load i256, ptr %src.ptr, align 1 %byteOff = load i256, ptr %byteOff.ptr, align 1 %bitOff = shl i256 %byteOff, 3 @@ -4020,2001 +1024,102 @@ ret void } define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: shl_32bytes: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NO-BMI2-NO-SHLD-NEXT: movq (%rdi), %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %eax -; X64-NO-BMI2-NO-SHLD-NEXT: shlb $3, %al -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %r10d, %r10d -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rbp, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r10, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rbx, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r10, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: notb %sil -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r14, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbx, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbp, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: subb %al, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdx,%rdx), %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbx, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r10, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r13, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r12, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rax), %esi -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdx, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r8, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r10, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: testb %al, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %rbx, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %rbp, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %r10, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %r10, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r11, (%r9) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r15, 8(%r9) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, 24(%r9) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, 16(%r9) -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: shl_32bytes: +; X64: # %bb.0: +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq 8(%rdi), %rcx +; X64-NEXT: movq 24(%rdi), %r8 +; X64-NEXT: movq 16(%rdi), %rdi +; X64-NEXT: movzbl (%rsi), %esi +; X64-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %r8, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: andb $31, %sil +; X64-NEXT: negb %sil +; X64-NEXT: movzbl %sil, %eax +; X64-NEXT: movq -32(%rsp,%rax), %rcx +; X64-NEXT: movq -24(%rsp,%rax), %rsi +; X64-NEXT: movq -8(%rsp,%rax), %rdi +; X64-NEXT: movq -16(%rsp,%rax), %rax +; X64-NEXT: movq %rax, 16(%rdx) +; X64-NEXT: movq %rdi, 24(%rdx) +; X64-NEXT: movq %rcx, (%rdx) +; X64-NEXT: movq %rsi, 8(%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: shl_32bytes: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbp -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 16(%rdi), %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 24(%rdi), %r9 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlb $3, %al -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %al, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r9, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r8, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, %rbp -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %rbp -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r14, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rbp, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r10, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rbp, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r13, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r12, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rax), %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r10, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r11, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r14, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rbx, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r15, 8(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r12, 16(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, 24(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbp -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: shl_32bytes: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq (%rdi), %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %r8d -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlb $3, %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %r9, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %esi, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r15, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %rax, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r11, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %rcx, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r8d, %r13d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r13b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rax, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrq %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r13, %r12, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r14, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %r10, %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r9, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r13, %r14, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbp, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r15, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r13b -; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %r8b, %r13b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r13d, %r15d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r15b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%r10,%r10), %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r15, %rbp, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r13, %r9, %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbp, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r13b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r13, %r10, %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r13, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r12, %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbx, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%r8), %ebx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %r12d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r12b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r12, %r14, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rbx, %r10, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r10, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rbx, %r9, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %bl -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r9, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %r8b, %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %r13, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rcx, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %r15, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %rsi, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %rsi, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r11, 8(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r14, 24(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r9, 16(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: shl_32bytes: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %rbp -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 16(%rdi), %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 24(%rdi), %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %eax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlb $3, %al -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %al, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r10, %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rdi, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r13, %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r9, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r8, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rax, %r8, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rbx, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rax, %r10, %rbp -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rbp, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r10, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rbp, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r12, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r13, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rax), %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r10, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rcx, %r10, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r10, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %r14, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %rbx, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r11, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r15, 8(%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r10, 16(%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rdi, 24(%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %rbp -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: shl_32bytes: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $140, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl 16(%ecx), %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%ecx), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shlb $3, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%ebx), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl 24(%ebx), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%ebx), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: notb %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %al -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl 20(%edi), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, (%esp) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-128, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%edi), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb (%esp), %ah # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %al -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: negb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %edi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%esp), %edx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: notb %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-128, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: subb %ch, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: negb %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl 28(%esi), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ah # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %edi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb (%esp), %ah # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: addb $64, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-64, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%esp), %edx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: subb %dl, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb (%esp), %al # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnsl %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ebp, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 4(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 12(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 8(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, 28(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 24(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 16(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, 20(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $140, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: shl_32bytes: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $116, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 16(%edx), %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%edx), %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shlb $3, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%edx), %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 24(%edx), %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%edx), %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 20(%edx), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-128, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bh, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%esi), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 28(%esi), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %bl, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %bh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 4(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 8(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 12(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, 24(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, 28(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, 16(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 20(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $116, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: shl_32bytes: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $164, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%edx), %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 24(%edx), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlb $3, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 16(%edx), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%edx), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esi), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 20(%ecx), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-128, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%esi), %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edi, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %cl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl (%esp), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 28(%eax), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edi, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $bl killed $bl killed $ebx def $ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnsl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 4(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 12(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 8(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 28(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, 24(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, 16(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, 20(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $164, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: shl_32bytes: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $128, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 24(%ecx), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlb $3, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 16(%ebp), %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%ebp), %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%ebx), %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 20(%edx), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-128, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebp, %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $al killed $al killed $eax def $eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %ebp, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%esi), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 28(%esi), %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %al, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %bl, %ch -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %ch -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %ch -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %ch, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %esi, %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, (%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, 4(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, 8(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, 12(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 24(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, 28(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, 16(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 20(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $128, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: shl_32bytes: +; X32: # %bb.0: +; X32-NEXT: pushl %ebp +; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi +; X32-NEXT: subl $72, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl (%esi), %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl 4(%esi), %ecx +; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NEXT: movl 12(%esi), %edi +; X32-NEXT: movl 8(%esi), %ebx +; X32-NEXT: movl 20(%esi), %ebp +; X32-NEXT: movzbl (%eax), %eax +; X32-NEXT: movl 16(%esi), %edx +; X32-NEXT: movl 28(%esi), %ecx +; X32-NEXT: movl 24(%esi), %esi +; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: andb $31, %al +; X32-NEXT: negb %al +; X32-NEXT: movzbl %al, %eax +; X32-NEXT: movl 40(%esp,%eax), %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl 44(%esp,%eax), %ecx +; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NEXT: movl 52(%esp,%eax), %esi +; X32-NEXT: movl 48(%esp,%eax), %edi +; X32-NEXT: movl 60(%esp,%eax), %ebx +; X32-NEXT: movl 56(%esp,%eax), %ebp +; X32-NEXT: movl 68(%esp,%eax), %edx +; X32-NEXT: movl 64(%esp,%eax), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %ecx, 24(%eax) +; X32-NEXT: movl %edx, 28(%eax) +; X32-NEXT: movl %ebp, 16(%eax) +; X32-NEXT: movl %ebx, 20(%eax) +; X32-NEXT: movl %edi, 8(%eax) +; X32-NEXT: movl %esi, 12(%eax) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, (%eax) +; X32-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, 4(%eax) +; X32-NEXT: addl $72, %esp +; X32-NEXT: popl %esi +; X32-NEXT: popl %edi +; X32-NEXT: popl %ebx +; X32-NEXT: popl %ebp +; X32-NEXT: retl %src = load i256, ptr %src.ptr, align 1 %byteOff = load i256, ptr %byteOff.ptr, align 1 %bitOff = shl i256 %byteOff, 3 @@ -6023,1996 +1128,100 @@ ret void } define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: ashr_32bytes: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movq (%rdi), %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NO-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %eax -; X64-NO-BMI2-NO-SHLD-NEXT: shlb $3, %al -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r12, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rcx, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: sarq %cl, %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: sarq $63, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r12,%r12), %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: notb %sil -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r9, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r14, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r8,%r8), %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r13, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rdx, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: subb %al, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdx, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rdx, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rcx, %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbp, %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r15, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rax), %esi -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r9, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: sarq %cl, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r8, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: testb %al, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %rdx, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %r13, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r12, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %r11, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %r11, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rbx, 24(%r10) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, 16(%r10) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, (%r10) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, 8(%r10) -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: ashr_32bytes: +; X64: # %bb.0: +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq 8(%rdi), %rcx +; X64-NEXT: movq 24(%rdi), %r8 +; X64-NEXT: movq 16(%rdi), %rdi +; X64-NEXT: movzbl (%rsi), %esi +; X64-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %r8, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) +; X64-NEXT: sarq $63, %r8 +; X64-NEXT: movq %r8, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %r8, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %r8, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %r8, -{{[0-9]+}}(%rsp) +; X64-NEXT: andl $31, %esi +; X64-NEXT: movq -64(%rsp,%rsi), %rax +; X64-NEXT: movq -56(%rsp,%rsi), %rcx +; X64-NEXT: movq -40(%rsp,%rsi), %rdi +; X64-NEXT: movq -48(%rsp,%rsi), %rsi +; X64-NEXT: movq %rsi, 16(%rdx) +; X64-NEXT: movq %rdi, 24(%rdx) +; X64-NEXT: movq %rax, (%rdx) +; X64-NEXT: movq %rcx, 8(%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: ashr_32bytes: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbp -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 16(%rdi), %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %r9 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 24(%rdi), %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlb $3, %al -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: sarq %cl, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r9, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r10, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %rsi -; X64-NO-BMI2-HAVE-SHLD-NEXT: sarq $63, %rsi -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r14, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r12, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rbp, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %al, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r8, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rbp, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r15, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r14, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rax), %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: sarq %cl, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rdi, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r13, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r12, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rbx, 16(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r11, 24(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, 8(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbp -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: ashr_32bytes: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq (%rdi), %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %r8d -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlb $3, %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r8, %rcx, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %eax, %eax -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r15, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rax, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r10, %rsi -; X64-HAVE-BMI2-NO-SHLD-NEXT: sarq $63, %rsi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: sarxq %r8, %r10, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r12, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r8, %rbx, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r8d, %ebp -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %bpl -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rcx,%rcx), %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rbp, %r13, %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r14, %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r15, %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r8, %r14, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%r10,%r10), %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rbp, %r15, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rax, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r12, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %al -; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %r8b, %al -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %r12d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r12b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r14, %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrq %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r12, %rbp, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rax, %r10, %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbp, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rax, %r14, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rax, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rbp, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r13, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r11, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%r8), %r11d -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r11d, %r13d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r13b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %r15, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r11, %r14, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r14, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: sarxq %r11, %r10, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r11b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r10, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %r8b, %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %rax, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %r12, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rcx, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %rsi, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %rsi, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r9, 24(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, 16(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r15, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r10, 8(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: ashr_32bytes: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %rbp -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 16(%rdi), %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 24(%rdi), %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %eax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlb $3, %al -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r9, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rax, %rdi, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r11, %rsi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: sarq $63, %rsi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r12d, %r12d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r15, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: sarxq %rax, %r11, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r13, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r11, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r13, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %al, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r11, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r8, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rcx, %r8, %rbp -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rbp, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %rbp -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r14, %rbp -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r15, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rax), %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r11, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: sarxq %rcx, %r11, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r11, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %r13, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %rbp, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rbx, 16(%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r10, 24(%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r11, 8(%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %rbp -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: ashr_32bytes: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $144, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%ecx), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl 28(%ecx), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movb (%eax), %ch -; X32-NO-BMI2-NO-SHLD-NEXT: shlb $3, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: sarl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: sarl $31, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ebx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl 20(%ebp), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%ebp), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl 24(%ebp), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%esi), %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: addl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-128, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: sarl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: negb %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: sarl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: addb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: sarl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl 16(%eax), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ah # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: notb %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%ecx), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: addl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-128, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: notb %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ah # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: subb %ah, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: negb %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bh, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ah # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnsl %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 28(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 24(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 16(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 20(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, 12(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 4(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 8(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $144, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: ashr_32bytes: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $124, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 28(%ecx), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb (%eax), %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: shlb $3, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl $31, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %edx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 20(%edx), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%edx), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%edx), %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 24(%edx), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-128, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 16(%eax), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%eax), %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ecx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dh # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnsl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %dl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %dh, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %dh, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 24(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 28(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 16(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 20(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 8(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, 12(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $124, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: ashr_32bytes: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $168, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 28(%edx), %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%edx), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlb $3, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: sarl $31, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 20(%edx), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%edx), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: sarxl %ebx, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 24(%ecx), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%eax), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-128, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: sarxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebp, %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebp, %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: sarxl %ecx, %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edi, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebp, %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebp, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $bl killed $bl killed $ebx def $ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: sarxl %esi, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 16(%edi), %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%eax), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebp, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %cl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %cl, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $bl killed $bl def $ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnsl %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 28(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 24(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 16(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 20(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, 12(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 4(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, 8(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $168, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: ashr_32bytes: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $132, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 28(%ecx), %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 24(%ecx), %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlb $3, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarl $31, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarxl %ebx, %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%ebp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 20(%ebp), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-128, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarxl %eax, %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarxl %ebx, %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edi, %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 16(%eax), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%eax), %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %cl, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarxl %ecx, %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %dl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 24(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 28(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 16(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 20(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 8(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, 12(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $132, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: ashr_32bytes: +; X32: # %bb.0: +; X32-NEXT: pushl %ebp +; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi +; X32-NEXT: subl $72, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl (%eax), %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl 4(%eax), %ecx +; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NEXT: movl 12(%eax), %edi +; X32-NEXT: movl 8(%eax), %ebx +; X32-NEXT: movl 20(%eax), %ebp +; X32-NEXT: movl 16(%eax), %esi +; X32-NEXT: movl 28(%eax), %ecx +; X32-NEXT: movl 24(%eax), %edx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movzbl (%eax), %eax +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NEXT: movl (%esp), %edx # 4-byte Reload +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NEXT: sarl $31, %ecx +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: andl $31, %eax +; X32-NEXT: movl 8(%esp,%eax), %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl 12(%esp,%eax), %ecx +; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NEXT: movl 20(%esp,%eax), %esi +; X32-NEXT: movl 16(%esp,%eax), %edi +; X32-NEXT: movl 28(%esp,%eax), %ebx +; X32-NEXT: movl 24(%esp,%eax), %ebp +; X32-NEXT: movl 36(%esp,%eax), %edx +; X32-NEXT: movl 32(%esp,%eax), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %ecx, 24(%eax) +; X32-NEXT: movl %edx, 28(%eax) +; X32-NEXT: movl %ebp, 16(%eax) +; X32-NEXT: movl %ebx, 20(%eax) +; X32-NEXT: movl %edi, 8(%eax) +; X32-NEXT: movl %esi, 12(%eax) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, (%eax) +; X32-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, 4(%eax) +; X32-NEXT: addl $72, %esp +; X32-NEXT: popl %esi +; X32-NEXT: popl %edi +; X32-NEXT: popl %ebx +; X32-NEXT: popl %ebp +; X32-NEXT: retl %src = load i256, ptr %src.ptr, align 1 %byteOff = load i256, ptr %byteOff.ptr, align 1 %bitOff = shl i256 %byteOff, 3 @@ -8022,9 +1231,7 @@ } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; ALL: {{.*}} -; X32: {{.*}} ; X32-NO-SHLD: {{.*}} ; X32-SHLD: {{.*}} -; X64: {{.*}} ; X64-NO-SHLD: {{.*}} ; X64-SHLD: {{.*}} diff --git a/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll b/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll --- a/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll +++ b/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll @@ -627,239 +627,35 @@ ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb %sil, (%rdx) ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq ; -; X32-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: negb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $4, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $4, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, (%esp), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %cl, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $4, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %ebp, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %bl, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_1byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X32: # %bb.0: +; X32-NEXT: subl $32, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: movzbl %cl, %edx +; X32-NEXT: cmpb $16, %cl +; X32-NEXT: movl $16, %ecx +; X32-NEXT: cmovbl %edx, %ecx +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movzbl (%esp,%ecx), %ecx +; X32-NEXT: movb %cl, (%eax) +; X32-NEXT: addl $32, %esp +; X32-NEXT: retl %init = load <8 x i8>, ptr %src, align 1 %intermediate.sroa.0.0.vec.expand = shufflevector <8 x i8> %init, <8 x i8> poison, <16 x i32> %intermediate.sroa.0.0.vecblend = shufflevector <16 x i8> %intermediate.sroa.0.0.vec.expand, <16 x i8> , <16 x i32> @@ -944,240 +740,35 @@ ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movw %si, (%rdx) ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq ; -; X32-NO-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: negb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movw %cx, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $4, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movw %bp, (%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, (%esp), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movw %bp, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $4, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movw %di, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $4, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_2byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X32: # %bb.0: +; X32-NEXT: subl $32, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: movzbl %cl, %edx +; X32-NEXT: cmpb $16, %cl +; X32-NEXT: movl $16, %ecx +; X32-NEXT: cmovbl %edx, %ecx +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl (%esp,%ecx), %ecx +; X32-NEXT: movw %cx, (%eax) +; X32-NEXT: addl $32, %esp +; X32-NEXT: retl %init = load <8 x i8>, ptr %src, align 1 %intermediate.sroa.0.0.vec.expand = shufflevector <8 x i8> %init, <8 x i8> poison, <16 x i32> %intermediate.sroa.0.0.vecblend = shufflevector <16 x i8> %intermediate.sroa.0.0.vec.expand, <16 x i8> , <16 x i32> @@ -1261,240 +852,35 @@ ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%rdx) ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq ; -; X32-NO-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: negb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $4, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, (%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, (%esp), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $4, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $4, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_4byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X32: # %bb.0: +; X32-NEXT: subl $32, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: movzbl %cl, %edx +; X32-NEXT: cmpb $16, %cl +; X32-NEXT: movl $16, %ecx +; X32-NEXT: cmovbl %edx, %ecx +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl (%esp,%ecx), %ecx +; X32-NEXT: movl %ecx, (%eax) +; X32-NEXT: addl $32, %esp +; X32-NEXT: retl %init = load <8 x i8>, ptr %src, align 1 %intermediate.sroa.0.0.vec.expand = shufflevector <8 x i8> %init, <8 x i8> poison, <16 x i32> %intermediate.sroa.0.0.vecblend = shufflevector <16 x i8> %intermediate.sroa.0.0.vec.expand, <16 x i8> , <16 x i32> @@ -1578,355 +964,37 @@ ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, (%rdx) ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq ; -; X32-NO-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $24, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, (%esp) # 4-byte Folded Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: negb %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: addl %edi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel (%esp), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 4(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $24, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $24, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $24, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $28, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, 4(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $28, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $28, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, (%esp), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, 4(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $28, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_8byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X32: # %bb.0: +; X32-NEXT: subl $32, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: movzbl %cl, %edx +; X32-NEXT: cmpb $16, %cl +; X32-NEXT: movl $16, %ecx +; X32-NEXT: cmovbl %edx, %ecx +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl (%esp,%ecx), %edx +; X32-NEXT: movl 4(%esp,%ecx), %ecx +; X32-NEXT: movl %ecx, 4(%eax) +; X32-NEXT: movl %edx, (%eax) +; X32-NEXT: addl $32, %esp +; X32-NEXT: retl %init = load <8 x i8>, ptr %src, align 1 %intermediate.sroa.0.0.vec.expand = shufflevector <8 x i8> %init, <8 x i8> poison, <16 x i32> %intermediate.sroa.0.0.vecblend = shufflevector <16 x i8> %intermediate.sroa.0.0.vec.expand, <16 x i8> , <16 x i32> @@ -1941,341 +1009,58 @@ } define void @load_1byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r8, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r9, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: movb %dil, (%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X64: # %bb.0: +; X64-NEXT: movdqu (%rdi), %xmm0 +; X64-NEXT: shll $3, %esi +; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: shrb $3, %sil +; X64-NEXT: movzbl %sil, %eax +; X64-NEXT: movzbl -64(%rsp,%rax), %eax +; X64-NEXT: movb %al, (%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rsi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rsi -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsl %esi, %edi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb %al, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %r8d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %r9, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rdi, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsl %eax, %edi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb %cl, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rsi, %rsi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rsi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsl %esi, %edi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb %al, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $8, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: negb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $8, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, (%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, (%esp), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %dl, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $4, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $8, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, (%esp), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %cl, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $8, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X32: # %bb.0: +; X32-NEXT: subl $64, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movdqu (%edx), %xmm0 +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movzbl (%esp,%ecx), %ecx +; X32-NEXT: movb %cl, (%eax) +; X32-NEXT: addl $64, %esp +; X32-NEXT: retl %init = load <16 x i8>, ptr %src, align 1 %intermediate.sroa.0.0.vec.expand = shufflevector <16 x i8> %init, <16 x i8> poison, <32 x i32> %intermediate.sroa.0.0.vecblend = shufflevector <32 x i8> %intermediate.sroa.0.0.vec.expand, <32 x i8> , <32 x i32> @@ -2291,337 +1076,58 @@ } define void @load_2byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r8, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r9, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: movw %di, (%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: load_2byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X64: # %bb.0: +; X64-NEXT: movdqu (%rdi), %xmm0 +; X64-NEXT: shll $3, %esi +; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: shrb $3, %sil +; X64-NEXT: movzbl %sil, %eax +; X64-NEXT: movq -64(%rsp,%rax), %rax +; X64-NEXT: movw %ax, (%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rsi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rsi -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsl %esi, %edi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: movw %ax, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %r8d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %r9, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rdi, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsl %eax, %edi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movw %cx, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rsi, %rsi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rsi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsl %esi, %edi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movw %ax, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $8, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, (%esp) # 4-byte Folded Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: negb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movw %si, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $8, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movw %si, (%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, (%esp), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movw %dx, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $4, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $8, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, (%esp), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movw %cx, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $8, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_2byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X32: # %bb.0: +; X32-NEXT: subl $64, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movdqu (%edx), %xmm0 +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl (%esp,%ecx), %ecx +; X32-NEXT: movw %cx, (%eax) +; X32-NEXT: addl $64, %esp +; X32-NEXT: retl %init = load <16 x i8>, ptr %src, align 1 %intermediate.sroa.0.0.vec.expand = shufflevector <16 x i8> %init, <16 x i8> poison, <32 x i32> %intermediate.sroa.0.0.vecblend = shufflevector <32 x i8> %intermediate.sroa.0.0.vec.expand, <32 x i8> , <32 x i32> @@ -2636,337 +1142,58 @@ } define void @load_4byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r8, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r9, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: movl %edi, (%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: load_4byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X64: # %bb.0: +; X64-NEXT: movdqu (%rdi), %xmm0 +; X64-NEXT: shll $3, %esi +; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: shrb $3, %sil +; X64-NEXT: movzbl %sil, %eax +; X64-NEXT: movl -64(%rsp,%rax), %eax +; X64-NEXT: movl %eax, (%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rsi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rsi -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsl %esi, %edi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %r8d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %r9, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rdi, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsl %eax, %edi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rsi, %rsi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rsi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsl %esi, %edi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $8, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, (%esp) # 4-byte Folded Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: negb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $8, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, (%esp), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $4, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $8, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, (%esp), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $8, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_4byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X32: # %bb.0: +; X32-NEXT: subl $64, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movdqu (%edx), %xmm0 +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl (%esp,%ecx), %ecx +; X32-NEXT: movl %ecx, (%eax) +; X32-NEXT: addl $64, %esp +; X32-NEXT: retl %init = load <16 x i8>, ptr %src, align 1 %intermediate.sroa.0.0.vec.expand = shufflevector <16 x i8> %init, <16 x i8> poison, <32 x i32> %intermediate.sroa.0.0.vecblend = shufflevector <32 x i8> %intermediate.sroa.0.0.vec.expand, <32 x i8> , <32 x i32> @@ -2981,471 +1208,60 @@ } define void @load_8byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r8, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r9, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %rax, %rcx -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %rcx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rcx, (%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: load_8byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X64: # %bb.0: +; X64-NEXT: movdqu (%rdi), %xmm0 +; X64-NEXT: shll $3, %esi +; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: shrb $3, %sil +; X64-NEXT: movzbl %sil, %eax +; X64-NEXT: movq -64(%rsp,%rax), %rax +; X64-NEXT: movq %rax, (%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rsi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rsi -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %rsi, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %r8d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %r9, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rdi, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %rax, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rcx, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rsi, %rsi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rsi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %rsi, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rdi, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $24, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, (%esp) # 4-byte Folded Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: negb %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: addl %edx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: addl %edi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb %cl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, (%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, 4(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $24, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $24, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $24, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $32, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebx,%ebx), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 4(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $32, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $24, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, (%esp) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 4(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $24, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_8byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X32: # %bb.0: +; X32-NEXT: subl $64, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movdqu (%edx), %xmm0 +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl (%esp,%ecx), %edx +; X32-NEXT: movl 4(%esp,%ecx), %ecx +; X32-NEXT: movl %ecx, 4(%eax) +; X32-NEXT: movl %edx, (%eax) +; X32-NEXT: addl $64, %esp +; X32-NEXT: retl %init = load <16 x i8>, ptr %src, align 1 %intermediate.sroa.0.0.vec.expand = shufflevector <16 x i8> %init, <16 x i8> poison, <32 x i32> %intermediate.sroa.0.0.vecblend = shufflevector <32 x i8> %intermediate.sroa.0.0.vec.expand, <32 x i8> , <32 x i32> @@ -3460,589 +1276,70 @@ } define void @load_16byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r8, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rax, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r8, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rcx, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %rcx, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %rcx, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rax, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, 8(%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, (%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: load_16byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X64: # %bb.0: +; X64-NEXT: movdqu (%rdi), %xmm0 +; X64-NEXT: shll $3, %esi +; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: shrb $3, %sil +; X64-NEXT: movzbl %sil, %eax +; X64-NEXT: movq -64(%rsp,%rax), %rcx +; X64-NEXT: movq -56(%rsp,%rax), %rax +; X64-NEXT: movq %rax, 8(%rdx) +; X64-NEXT: movq %rcx, (%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %r9d, %r9d -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r8, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r9, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %r9, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %r9, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rsi, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, 8(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %r8d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %r9, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rdi, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %r9d, %r9d -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rdi, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r9, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %r9, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rcx, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %r9, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, 8(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r8, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rsi, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r9d, %r9d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r8, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r9, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %r9, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %r9, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rsi, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, 8(%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rdi, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $44, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: negb %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 12(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 8(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, 4(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $44, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $32, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ecx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ecx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, 12(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 8(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, 4(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $32, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $44, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %esi, %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, 12(%edx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 8(%edx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%edx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, 4(%edx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $44, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $36, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 12(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, 8(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, 4(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $36, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_16byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X32: # %bb.0: +; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi +; X32-NEXT: subl $64, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movdqu (%edx), %xmm0 +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl (%esp,%ecx), %edx +; X32-NEXT: movl 4(%esp,%ecx), %esi +; X32-NEXT: movl 8(%esp,%ecx), %edi +; X32-NEXT: movl 12(%esp,%ecx), %ecx +; X32-NEXT: movl %ecx, 12(%eax) +; X32-NEXT: movl %edi, 8(%eax) +; X32-NEXT: movl %esi, 4(%eax) +; X32-NEXT: movl %edx, (%eax) +; X32-NEXT: addl $64, %esp +; X32-NEXT: popl %esi +; X32-NEXT: popl %edi +; X32-NEXT: retl %init = load <16 x i8>, ptr %src, align 1 %intermediate.sroa.0.0.vec.expand = shufflevector <16 x i8> %init, <16 x i8> poison, <32 x i32> %intermediate.sroa.0.0.vecblend = shufflevector <32 x i8> %intermediate.sroa.0.0.vec.expand, <32 x i8> , <32 x i32> @@ -4057,679 +1354,90 @@ } define void @load_1byte_chunk_of_64byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r11, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %r10d, %r10d -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebx -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rax, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r14, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %r9d -; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %eax -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r11, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovbl %r9d, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovel %r8d, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovael %r10d, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovel %r8d, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: movb %dil, (%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: load_1byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64: # %bb.0: +; X64-NEXT: movdqu (%rdi), %xmm0 +; X64-NEXT: movdqu 16(%rdi), %xmm1 +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] +; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm3, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm2, -{{[0-9]+}}(%rsp) +; X64-NEXT: andl $536870911, %esi # imm = 0x1FFFFFFF +; X64-NEXT: cmpl $64, %esi +; X64-NEXT: movl $64, %eax +; X64-NEXT: cmovbl %esi, %eax +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movzbl -128(%rsp,%rax), %eax +; X64-NEXT: movb %al, (%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %r9d, %r9d -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r11, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r11, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orl %ebx, %r8d -; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rax, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %r8d, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovael %r9d, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb %al, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %sil, %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r10, %r8, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %r10d -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r11, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ebx -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rbx, %r14, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r11, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %r9, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orl %r10d, %r9d -; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %r10d -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r10, %r8, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r10d, %r11d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r11b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rcx,%rcx), %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r11, %rbx, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r8, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r10, %rcx, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r11, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %r9d, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb %cl, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r9, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rsi, %r9, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %sil, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %r9, %rdi, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r11d, %r11d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %r9d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %r9d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %r8, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %r9d, %r8d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %r8d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %r11d, %r8d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %r8d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb %r8b, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $12, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(,%ecx,8), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: subb %dl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: testl %edx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: subb %al, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(%ecx), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: negl %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $12, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $8, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: leal -128(,%ecx,8), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %dl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %al, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%esi), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: negl %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $8, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $12, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%ecx), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(,%eax,8), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %cl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ebp), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, (%esp), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %dl, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%esi), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: negl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael (%esp), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %dl, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $12, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm2 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm2[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(,%ebx,8), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %dl, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %ebp, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %edx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %bl, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%ebx), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negl %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, (%esp), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %ebx # imm = 0x100 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %cl, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $4, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_1byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32: # %bb.0: +; X32-NEXT: subl $128, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movdqu (%ecx), %xmm0 +; X32-NEXT: movdqu 16(%ecx), %xmm1 +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm6 = xmm1[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm7 = xmm1[2,3,2,3] +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm7, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm6, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm5, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm4, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movl $536870911, %edx # imm = 0x1FFFFFFF +; X32-NEXT: andl {{[0-9]+}}(%esp), %edx +; X32-NEXT: cmpl $64, %edx +; X32-NEXT: movl $64, %ecx +; X32-NEXT: cmovbl %edx, %ecx +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movzbl (%esp,%ecx), %ecx +; X32-NEXT: movb %cl, (%eax) +; X32-NEXT: addl $128, %esp +; X32-NEXT: retl %init = load <32 x i8>, ptr %src, align 1 %intermediate.sroa.0.0.vec.expand = shufflevector <32 x i8> %init, <32 x i8> poison, <64 x i32> %intermediate.sroa.0.0.vecblend = shufflevector <64 x i8> %intermediate.sroa.0.0.vec.expand, <64 x i8> , <64 x i32> @@ -4745,674 +1453,90 @@ } define void @load_2byte_chunk_of_64byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r11, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %r10d, %r10d -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebx -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rax, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r14, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %r9d -; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %eax -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r11, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovbl %r9d, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovel %r8d, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovael %r10d, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovel %r8d, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: movw %di, (%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: load_2byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64: # %bb.0: +; X64-NEXT: movdqu (%rdi), %xmm0 +; X64-NEXT: movdqu 16(%rdi), %xmm1 +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] +; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm3, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm2, -{{[0-9]+}}(%rsp) +; X64-NEXT: andl $536870911, %esi # imm = 0x1FFFFFFF +; X64-NEXT: cmpl $64, %esi +; X64-NEXT: movl $64, %eax +; X64-NEXT: cmovbl %esi, %eax +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq -128(%rsp,%rax), %rax +; X64-NEXT: movw %ax, (%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %r9d, %r9d -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r11, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r11, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orl %ebx, %r8d -; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rax, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %r8d, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovael %r9d, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: movw %ax, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %sil, %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r10, %r8, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %r10d -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r11, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ebx -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rbx, %r14, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r11, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %r9, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orl %r10d, %r9d -; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %r10d -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r10, %r8, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r10d, %r11d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r11b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rcx,%rcx), %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r11, %rbx, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r8, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r10, %rcx, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r11, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %r9d, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movw %cx, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r9, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rsi, %r9, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %sil, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %r9, %rdi, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r11d, %r11d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %r9d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %r9d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %r8, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %r9d, %r8d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %r8d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %r11d, %r8d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %r8d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movw %r8w, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $16, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(,%ecx,8), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: subb %dl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: testl %edx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: subb %al, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testl %ebx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(%ebx), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: negl %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael (%esp), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movw %bx, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $16, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $8, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: leal -128(,%ecx,8), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %dl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%edi), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: negl %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %edi # imm = 0x100 -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movw %si, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $8, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $12, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%ecx), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(,%eax,8), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %cl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ebp), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, (%esp), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %dl, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%esi), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: negl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael (%esp), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movw %dx, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $12, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm2 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm2[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(,%ebx,8), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %dl, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %ebp, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %edx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %bl, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%ebx), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negl %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, (%esp), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %ebx # imm = 0x100 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movw %cx, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $4, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_2byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32: # %bb.0: +; X32-NEXT: subl $128, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movdqu (%ecx), %xmm0 +; X32-NEXT: movdqu 16(%ecx), %xmm1 +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm6 = xmm1[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm7 = xmm1[2,3,2,3] +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm7, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm6, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm5, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm4, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movl $536870911, %edx # imm = 0x1FFFFFFF +; X32-NEXT: andl {{[0-9]+}}(%esp), %edx +; X32-NEXT: cmpl $64, %edx +; X32-NEXT: movl $64, %ecx +; X32-NEXT: cmovbl %edx, %ecx +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl (%esp,%ecx), %ecx +; X32-NEXT: movw %cx, (%eax) +; X32-NEXT: addl $128, %esp +; X32-NEXT: retl %init = load <32 x i8>, ptr %src, align 1 %intermediate.sroa.0.0.vec.expand = shufflevector <32 x i8> %init, <32 x i8> poison, <64 x i32> %intermediate.sroa.0.0.vecblend = shufflevector <64 x i8> %intermediate.sroa.0.0.vec.expand, <64 x i8> , <64 x i32> @@ -5427,671 +1551,90 @@ } define void @load_4byte_chunk_of_64byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r11, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %r10d, %r10d -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebx -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rax, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r14, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %r9d -; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %eax -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r11, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovbl %r9d, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovel %r8d, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovael %r10d, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovel %r8d, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: movl %edi, (%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: load_4byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64: # %bb.0: +; X64-NEXT: movdqu (%rdi), %xmm0 +; X64-NEXT: movdqu 16(%rdi), %xmm1 +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] +; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm3, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm2, -{{[0-9]+}}(%rsp) +; X64-NEXT: andl $536870911, %esi # imm = 0x1FFFFFFF +; X64-NEXT: cmpl $64, %esi +; X64-NEXT: movl $64, %eax +; X64-NEXT: cmovbl %esi, %eax +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movl -128(%rsp,%rax), %eax +; X64-NEXT: movl %eax, (%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %r9d, %r9d -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r11, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r11, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orl %ebx, %r8d -; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rax, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %r8d, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovael %r9d, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %sil, %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r10, %r8, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %r10d -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r11, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ebx -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rbx, %r14, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r11, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %r9, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orl %r10d, %r9d -; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %r10d -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r10, %r8, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r10d, %r11d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r11b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rcx,%rcx), %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r11, %rbx, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r8, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r10, %rcx, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r11, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %r9d, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r9, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rsi, %r9, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %sil, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %r9, %rdi, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r11d, %r11d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %r9d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %r9d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %r8, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %r9d, %r8d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %r8d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %r11d, %r8d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %r8d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %r8d, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $12, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(,%ecx,8), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: subb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: testl %ebx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: subb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(%ecx), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: negl %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $12, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $8, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: leal -128(,%ecx,8), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %dl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%edi), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: negl %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %edi # imm = 0x100 -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $8, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $12, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%ecx), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(,%eax,8), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %cl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ebp), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, (%esp), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %dl, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%esi), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: negl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael (%esp), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $12, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm2 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm2[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(,%ebx,8), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %dl, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %ebp, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %edx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %bl, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%ebx), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negl %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, (%esp), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %ebx # imm = 0x100 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $4, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_4byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32: # %bb.0: +; X32-NEXT: subl $128, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movdqu (%ecx), %xmm0 +; X32-NEXT: movdqu 16(%ecx), %xmm1 +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm6 = xmm1[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm7 = xmm1[2,3,2,3] +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm7, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm6, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm5, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm4, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movl $536870911, %edx # imm = 0x1FFFFFFF +; X32-NEXT: andl {{[0-9]+}}(%esp), %edx +; X32-NEXT: cmpl $64, %edx +; X32-NEXT: movl $64, %ecx +; X32-NEXT: cmovbl %edx, %ecx +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl (%esp,%ecx), %ecx +; X32-NEXT: movl %ecx, (%eax) +; X32-NEXT: addl $128, %esp +; X32-NEXT: retl %init = load <32 x i8>, ptr %src, align 1 %intermediate.sroa.0.0.vec.expand = shufflevector <32 x i8> %init, <32 x i8> poison, <64 x i32> %intermediate.sroa.0.0.vecblend = shufflevector <64 x i8> %intermediate.sroa.0.0.vec.expand, <64 x i8> , <64 x i32> @@ -6106,1030 +1649,92 @@ } define void @load_8byte_chunk_of_64byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r11, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %r10d, %r10d -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r10, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbx, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r14, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rax, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %eax -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r11, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovbq %r9, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovaeq %r10, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, (%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: load_8byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64: # %bb.0: +; X64-NEXT: movdqu (%rdi), %xmm0 +; X64-NEXT: movdqu 16(%rdi), %xmm1 +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] +; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm3, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm2, -{{[0-9]+}}(%rsp) +; X64-NEXT: andl $536870911, %esi # imm = 0x1FFFFFFF +; X64-NEXT: cmpl $64, %esi +; X64-NEXT: movl $64, %eax +; X64-NEXT: cmovbl %esi, %eax +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq -128(%rsp,%rax), %rax +; X64-NEXT: movq %rax, (%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %r9 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r9, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %r10d, %r10d -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r10, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rbx, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r11, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rax, %r9 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovbq %r8, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovaeq %r10, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %sil, %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r10, %r8, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rdi, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ebx -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rbx, %r14, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r10, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %r9, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r11, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %r10d -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r10, %r8, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r10d, %r11d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r11b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rcx,%rcx), %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r11, %rbx, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r8, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r10, %rcx, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r11, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovbq %r9, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovaeq %rdi, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rcx, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r9, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rsi, %r9, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %sil, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %r9, %rdi, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r11d, %r11d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r11, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %rcx, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %r8, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbq %r10, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovaeq %r11, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $68, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-NO-BMI2-NO-SHLD-NEXT: leal (,%edx,8), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(%ebx), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: negl %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: subb %bl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm3, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(,%eax,8), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: subb %al, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, (%esp) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testl %ebx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: addl %edx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testl %edx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %ecx # imm = 0x100 -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, 4(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $68, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $72, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm3, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: leal -128(,%ecx,8), %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: # kill: def $al killed $al killed $eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm3, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm4, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %eax, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %al, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %eax, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $-128, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: negl %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %eax, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %eax, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %eax # imm = 0x100 -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %eax, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, 4(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $72, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $80, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (,%ecx,8), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%eax), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: negl %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %dl, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(,%eax,8), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %al, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl (%esp), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edi, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $al killed $al killed $eax def $eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %ecx # imm = 0x100 -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, 4(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $80, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $72, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(,%ecx,8), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %cl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $dl killed $dl killed $edx def $edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %al, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%eax), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negl %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %edx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %edi # imm = 0x100 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, 4(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $72, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_8byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32: # %bb.0: +; X32-NEXT: subl $128, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movdqu (%ecx), %xmm0 +; X32-NEXT: movdqu 16(%ecx), %xmm1 +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm6 = xmm1[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm7 = xmm1[2,3,2,3] +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm7, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm6, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm5, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm4, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movl $536870911, %edx # imm = 0x1FFFFFFF +; X32-NEXT: andl {{[0-9]+}}(%esp), %edx +; X32-NEXT: cmpl $64, %edx +; X32-NEXT: movl $64, %ecx +; X32-NEXT: cmovbl %edx, %ecx +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl (%esp,%ecx), %edx +; X32-NEXT: movl 4(%esp,%ecx), %ecx +; X32-NEXT: movl %ecx, 4(%eax) +; X32-NEXT: movl %edx, (%eax) +; X32-NEXT: addl $128, %esp +; X32-NEXT: retl %init = load <32 x i8>, ptr %src, align 1 %intermediate.sroa.0.0.vec.expand = shufflevector <32 x i8> %init, <32 x i8> poison, <64 x i32> %intermediate.sroa.0.0.vecblend = shufflevector <64 x i8> %intermediate.sroa.0.0.vec.expand, <64 x i8> , <64 x i32> @@ -7144,1586 +1749,102 @@ } define void @load_16byte_chunk_of_64byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %al -; X64-NO-BMI2-NO-SHLD-NEXT: subb %sil, %al -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r10, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbx, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbx, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %ebx, %ebx -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r12, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r10, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r14, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r15, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r12, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %eax -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r10, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r9, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovbq %r14, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovbq %r11, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r8, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovaeq %rbx, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovaeq %rbx, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r8, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, 8(%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r15, (%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: load_16byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64: # %bb.0: +; X64-NEXT: movdqu (%rdi), %xmm0 +; X64-NEXT: movdqu 16(%rdi), %xmm1 +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] +; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm3, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm2, -{{[0-9]+}}(%rsp) +; X64-NEXT: andl $536870911, %esi # imm = 0x1FFFFFFF +; X64-NEXT: cmpl $64, %esi +; X64-NEXT: movl $64, %eax +; X64-NEXT: cmovbl %esi, %eax +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq -128(%rsp,%rax), %rcx +; X64-NEXT: movq -120(%rsp,%rax), %rax +; X64-NEXT: movq %rax, 8(%rdx) +; X64-NEXT: movq %rcx, (%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r9 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r9, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r9, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %r10d, %r10d -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r14, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r10, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %rax, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r15, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r10, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %rbx, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r14, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rdi, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r10, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovbq %r11, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovbq %r15, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovaeq %r10, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovaeq %r10, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, 8(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %sil, %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %rdi, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r8d, %r11d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r11b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %r9, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %r14d -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r14, %r9, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrq %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r11, %r9, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r10, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %r11d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r11b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r11, %r12, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r10, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %r12d, %r12d -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r10, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r12, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r12, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r11, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r10, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r14d, %r8d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %r10, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r15, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r14, %rdi, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r14b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rdi, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r12, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovbq %rbx, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovbq %r9, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rcx, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovaeq %r12, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovaeq %r12, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rcx, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r8, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, 8(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rsi, %r8, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r9d, %r9d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rbx, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r9, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r10, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %rax, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rcx, %rax, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r15, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r9, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r11, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %rbx, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r10, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %r10, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r10, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r9, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbq %r14, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbq %r15, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovaeq %r9, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovaeq %r9, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r10, 8(%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $112, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: leal (,%edx,8), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(%eax), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: negl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: subb %bl, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: testl %ebx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm3, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: notb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(,%ecx,8), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: addl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testl %edi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: subb %al, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testl %edi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %edx # imm = 0x100 -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testl %edx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 4(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, 12(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 8(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $112, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $88, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm3, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: leal (,%esi,8), %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%ebp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: negl %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %al, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %eax, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm3, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: leal -128(,%edx,8), %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl (%esp), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %cl, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %bl # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %ebp, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %ecx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %ecx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %ecx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %ecx # imm = 0x100 -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, 12(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, 8(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $88, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $124, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (,%ecx,8), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%eax), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: negl %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %dl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edi, %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm3, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebx,%ebx), %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebp, %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(,%eax,8), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, (%esp) # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $al killed $al def $eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %al, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $al killed $al killed $eax def $eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, (%esp) # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %edi # imm = 0x100 -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, 4(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 12(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 8(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $124, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $92, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm3, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal (,%ecx,8), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%ecx), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negl %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %dl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %ebx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %edx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm3, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(,%eax,8), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl (%esp), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ecx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %cl, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $bl killed $bl killed $ebx def $ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %cl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ecx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ecx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ecx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %ecx # imm = 0x100 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 4(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, 12(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, 8(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $92, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_16byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32: # %bb.0: +; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi +; X32-NEXT: subl $128, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movdqu (%ecx), %xmm0 +; X32-NEXT: movdqu 16(%ecx), %xmm1 +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm6 = xmm1[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm7 = xmm1[2,3,2,3] +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm7, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm6, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm5, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm4, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movl $536870911, %edx # imm = 0x1FFFFFFF +; X32-NEXT: andl {{[0-9]+}}(%esp), %edx +; X32-NEXT: cmpl $64, %edx +; X32-NEXT: movl $64, %ecx +; X32-NEXT: cmovbl %edx, %ecx +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl (%esp,%ecx), %edx +; X32-NEXT: movl 4(%esp,%ecx), %esi +; X32-NEXT: movl 8(%esp,%ecx), %edi +; X32-NEXT: movl 12(%esp,%ecx), %ecx +; X32-NEXT: movl %ecx, 12(%eax) +; X32-NEXT: movl %edi, 8(%eax) +; X32-NEXT: movl %esi, 4(%eax) +; X32-NEXT: movl %edx, (%eax) +; X32-NEXT: addl $128, %esp +; X32-NEXT: popl %esi +; X32-NEXT: popl %edi +; X32-NEXT: retl %init = load <32 x i8>, ptr %src, align 1 %intermediate.sroa.0.0.vec.expand = shufflevector <32 x i8> %init, <32 x i8> poison, <64 x i32> %intermediate.sroa.0.0.vecblend = shufflevector <64 x i8> %intermediate.sroa.0.0.vec.expand, <64 x i8> , <64 x i32> @@ -8738,2219 +1859,122 @@ } define void @load_32byte_chunk_of_64byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: load_32byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm1 -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %r11d, %r11d -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r13, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rbx, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X64-NO-BMI2-NO-SHLD-NEXT: notb %al -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdx, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbx, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r13, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %al -; X64-NO-BMI2-NO-SHLD-NEXT: subb %sil, %al -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbx, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbp, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r12, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %eax -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r12, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r12, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovbq %rbx, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovaeq %r11, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovbq %r13, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovaeq %r11, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rax, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r9, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovaeq %r11, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovaeq %r11, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovaeq %r11, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovaeq %r11, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rax, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r9, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rdx, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r10, 24(%r8) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r15, 16(%r8) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r12, 8(%r8) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, (%r8) -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: load_32byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64: # %bb.0: +; X64-NEXT: movdqu (%rdi), %xmm0 +; X64-NEXT: movdqu 16(%rdi), %xmm1 +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] +; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm3, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm2, -{{[0-9]+}}(%rsp) +; X64-NEXT: andl $536870911, %esi # imm = 0x1FFFFFFF +; X64-NEXT: cmpl $64, %esi +; X64-NEXT: movl $64, %eax +; X64-NEXT: cmovbl %esi, %eax +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq -128(%rsp,%rax), %rcx +; X64-NEXT: movq -120(%rsp,%rax), %rsi +; X64-NEXT: movq -112(%rsp,%rax), %rdi +; X64-NEXT: movq -104(%rsp,%rax), %rax +; X64-NEXT: movq %rax, 24(%rdx) +; X64-NEXT: movq %rdi, 16(%rdx) +; X64-NEXT: movq %rsi, 8(%rdx) +; X64-NEXT: movq %rcx, (%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: load_32byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r9 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r9, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r9, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r15, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r12, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rbx, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rbx, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %rax, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r13, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rbx, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r14, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r15, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r15, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rbx, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovbq %r12, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovbq %r13, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovaeq %rbx, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovaeq %rbx, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovaeq %rbx, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovaeq %rbx, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovaeq %rbx, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovaeq %rbx, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, 24(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r11, 16(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r15, 8(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r14, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: load_32byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %r9d, %r9d -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r15, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r9, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rdi, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r11, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r9, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %r13d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r13b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %r14, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r10, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %r10, %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %r14, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbp, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r15, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r13b -; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %sil, %r13b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r13d, %r15d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r15b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r10, %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrq %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r15, %rbp, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %rax, %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbp, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r13b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %r10, %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r13, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r9, %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r12, %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbx, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %r12d -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r12d, %ebx -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rbx, %r14, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r12, %r10, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r14, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r12b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r12, %rax, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r14, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r9, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovbq %r13, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovbq %r15, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovaeq %r9, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovaeq %r9, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rcx, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovaeq %r9, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovaeq %r9, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovaeq %r9, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovaeq %r9, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rcx, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r10, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r8, 24(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r11, 16(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rbx, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r14, 8(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_32byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r9, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rsi, %r9, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r11d, %r11d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r15, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rsi, %rdi, %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r12, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r11, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r11, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %rax, %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rcx, %rax, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r13, %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r11, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r14, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r15, %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rdi, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r15, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r11, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbq %r12, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbq %r13, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovaeq %r11, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovaeq %r11, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovaeq %r11, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovaeq %r11, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovaeq %r11, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovaeq %r11, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r10, 24(%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rbx, 16(%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r15, 8(%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r14, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: load_32byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $168, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm1 -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%ecx), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm3, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm1[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm3, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm3, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: leal (,%eax,8), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm3, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, (%esp) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb (%esp), %ch # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: addl %edx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(,%eax,8), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(%edx), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: negl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ebx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: subb %al, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: notb %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testl %edx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testl %edi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testl %edi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testl %edi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %ebp # imm = 0x100 -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 28(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, 24(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, 20(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 16(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 4(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, 12(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 8(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $168, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_32byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $140, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: leal (,%ecx,8), %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %eax, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%ebx), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: negl %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: leal -128(,%eax,8), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %bl, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %eax, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %ecx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 28(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 24(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, 20(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 16(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, 4(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, 12(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, 8(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $140, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_32byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $192, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm1 -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%ecx), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm4, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm4, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm1[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (,%edx,8), %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm4, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm3, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(,%eax,8), %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%edi), %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: negl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edi, %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %esi, %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %bl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edi, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $bl killed $bl killed $ebx def $ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edi, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, (%esp), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael (%esp), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %dh -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %bl, %dh -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %dh, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %dh, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dh -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %bl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dh -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %ebx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %ebx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %ecx # imm = 0x100 -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 28(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 24(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, 16(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, 20(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 4(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 12(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, 8(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $192, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_32byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $140, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm1[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm4, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm4, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal (,%ecx,8), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm4, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm3, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %cl, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel (%esp), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $-128, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negl %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $dl killed $dl killed $edx def $edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(,%eax,8), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ecx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %dl, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ecx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %edx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ecx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %eax # imm = 0x100 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, 28(%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, 24(%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, 20(%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, 16(%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 4(%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, 12(%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, 8(%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $140, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_32byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32: # %bb.0: +; X32-NEXT: pushl %ebp +; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi +; X32-NEXT: subl $136, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movdqu (%eax), %xmm0 +; X32-NEXT: movdqu 16(%eax), %xmm1 +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm6 = xmm1[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm7 = xmm1[2,3,2,3] +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm0, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm7, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm6, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm5, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm4, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movl $536870911, %ecx # imm = 0x1FFFFFFF +; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: cmpl $64, %ecx +; X32-NEXT: movl $64, %eax +; X32-NEXT: cmovbl %ecx, %eax +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl 8(%esp,%eax), %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl 12(%esp,%eax), %ecx +; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NEXT: movl 16(%esp,%eax), %esi +; X32-NEXT: movl 20(%esp,%eax), %edi +; X32-NEXT: movl 24(%esp,%eax), %ebx +; X32-NEXT: movl 28(%esp,%eax), %ebp +; X32-NEXT: movl 32(%esp,%eax), %edx +; X32-NEXT: movl 36(%esp,%eax), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %ecx, 28(%eax) +; X32-NEXT: movl %edx, 24(%eax) +; X32-NEXT: movl %ebp, 20(%eax) +; X32-NEXT: movl %ebx, 16(%eax) +; X32-NEXT: movl %edi, 12(%eax) +; X32-NEXT: movl %esi, 8(%eax) +; X32-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, 4(%eax) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, (%eax) +; X32-NEXT: addl $136, %esp +; X32-NEXT: popl %esi +; X32-NEXT: popl %edi +; X32-NEXT: popl %ebx +; X32-NEXT: popl %ebp +; X32-NEXT: retl %init = load <32 x i8>, ptr %src, align 1 %intermediate.sroa.0.0.vec.expand = shufflevector <32 x i8> %init, <32 x i8> poison, <64 x i32> %intermediate.sroa.0.0.vecblend = shufflevector <64 x i8> %intermediate.sroa.0.0.vec.expand, <64 x i8> , <64 x i32> @@ -10965,9 +1989,7 @@ } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; ALL: {{.*}} -; X32: {{.*}} ; X32-NO-SHLD: {{.*}} ; X32-SHLD: {{.*}} -; X64: {{.*}} ; X64-NO-SHLD: {{.*}} ; X64-SHLD: {{.*}} diff --git a/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll b/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll --- a/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll +++ b/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll @@ -603,239 +603,35 @@ ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb %sil, (%rdx) ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq ; -; X32-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: negb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $4, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $4, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, (%esp), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %cl, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $4, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %ebp, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %bl, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_1byte_chunk_of_16byte_alloca: +; X32: # %bb.0: +; X32-NEXT: subl $32, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movdqu (%edx), %xmm0 +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: movzbl %cl, %edx +; X32-NEXT: cmpb $16, %cl +; X32-NEXT: movl $16, %ecx +; X32-NEXT: cmovbl %edx, %ecx +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movzbl (%esp,%ecx), %ecx +; X32-NEXT: movb %cl, (%eax) +; X32-NEXT: addl $32, %esp +; X32-NEXT: retl %init = load <16 x i8>, ptr %src, align 1 %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 %intermediate.val.frozen = freeze <16 x i8> %init @@ -918,240 +714,35 @@ ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movw %si, (%rdx) ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq ; -; X32-NO-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: negb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movw %cx, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $4, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movw %bp, (%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, (%esp), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movw %bp, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $4, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movw %di, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $4, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_2byte_chunk_of_16byte_alloca: +; X32: # %bb.0: +; X32-NEXT: subl $32, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movdqu (%edx), %xmm0 +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: movzbl %cl, %edx +; X32-NEXT: cmpb $16, %cl +; X32-NEXT: movl $16, %ecx +; X32-NEXT: cmovbl %edx, %ecx +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl (%esp,%ecx), %ecx +; X32-NEXT: movw %cx, (%eax) +; X32-NEXT: addl $32, %esp +; X32-NEXT: retl %init = load <16 x i8>, ptr %src, align 1 %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 %intermediate.val.frozen = freeze <16 x i8> %init @@ -1233,240 +824,35 @@ ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%rdx) ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq ; -; X32-NO-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: negb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $4, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, (%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, (%esp), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $4, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $4, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_4byte_chunk_of_16byte_alloca: +; X32: # %bb.0: +; X32-NEXT: subl $32, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movdqu (%edx), %xmm0 +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: movzbl %cl, %edx +; X32-NEXT: cmpb $16, %cl +; X32-NEXT: movl $16, %ecx +; X32-NEXT: cmovbl %edx, %ecx +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl (%esp,%ecx), %ecx +; X32-NEXT: movl %ecx, (%eax) +; X32-NEXT: addl $32, %esp +; X32-NEXT: retl %init = load <16 x i8>, ptr %src, align 1 %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 %intermediate.val.frozen = freeze <16 x i8> %init @@ -1548,355 +934,37 @@ ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, (%rdx) ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq ; -; X32-NO-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $24, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, (%esp) # 4-byte Folded Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: negb %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: addl %edi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel (%esp), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 4(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $24, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $24, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $24, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $28, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, 4(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $28, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $28, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, (%esp), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, 4(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $28, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_8byte_chunk_of_16byte_alloca: +; X32: # %bb.0: +; X32-NEXT: subl $32, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movdqu (%edx), %xmm0 +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: movzbl %cl, %edx +; X32-NEXT: cmpb $16, %cl +; X32-NEXT: movl $16, %ecx +; X32-NEXT: cmovbl %edx, %ecx +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl (%esp,%ecx), %edx +; X32-NEXT: movl 4(%esp,%ecx), %ecx +; X32-NEXT: movl %ecx, 4(%eax) +; X32-NEXT: movl %edx, (%eax) +; X32-NEXT: addl $32, %esp +; X32-NEXT: retl %init = load <16 x i8>, ptr %src, align 1 %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 %intermediate.val.frozen = freeze <16 x i8> %init @@ -1911,636 +979,64 @@ ; no @load_16byte_chunk_of_16byte_alloca define void @load_1byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r10, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %r11d, %r11d -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rax, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rax, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: orl %r11d, %r9d -; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %eax -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r10, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r11, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsl %r9d, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %r8d -; X64-NO-BMI2-NO-SHLD-NEXT: movb %r8b, (%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: load_1byte_chunk_of_32byte_alloca: +; X64: # %bb.0: +; X64-NEXT: movdqu (%rdi), %xmm0 +; X64-NEXT: movdqu 16(%rdi), %xmm1 +; X64-NEXT: shll $3, %esi +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] +; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm3, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm2, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: shrb $3, %sil +; X64-NEXT: movzbl %sil, %eax +; X64-NEXT: movzbl -64(%rsp,%rax), %eax +; X64-NEXT: movb %al, (%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %r9 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r9, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %r11d, %r11d -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orl %r11d, %r8d -; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r9 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %sil, %sil -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsl %r8d, %edi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb %al, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %sil, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r9, %rdi, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %r11d, %r11d -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r10, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %r10d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%r8,%r8), %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r10, %rbx, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r9, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %r8, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r10, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orl %r11d, %r8d -; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %r9d -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %rdi, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r9d, %r10d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rcx,%rcx), %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r10, %r11, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rdi, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %rcx, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r10, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsl %r8d, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb %al, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r9, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rsi, %r9, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %sil, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %r9, %rdi, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r11d, %r11d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %r11d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %r8, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %sil, %sil -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsl %r11d, %r8d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %r8d, %eax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb %al, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $16, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm1 -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%eax), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: negb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %edx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael (%esp), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: subb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-128, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, (%esp) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: addb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb %cl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-128, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: subb %dl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $16, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $8, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm1 -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %dl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-128, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %dl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $8, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $16, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm1 -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%ecx), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, (%esp), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %dl, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-128, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $dl killed $dl killed $edx def $edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebx,%ebx), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, (%esp), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %al, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %al, (%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $16, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $12, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm1 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, (%esp), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-128, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %bl, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %ecx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %cl, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $12, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_1byte_chunk_of_32byte_alloca: +; X32: # %bb.0: +; X32-NEXT: subl $64, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movdqu (%edx), %xmm0 +; X32-NEXT: movdqu 16(%edx), %xmm1 +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm6 = xmm1[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm7 = xmm1[2,3,2,3] +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm7, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm6, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm5, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm4, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movzbl (%esp,%ecx), %ecx +; X32-NEXT: movb %cl, (%eax) +; X32-NEXT: addl $64, %esp +; X32-NEXT: retl %init = load <32 x i8>, ptr %src, align 1 %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 %intermediate.val.frozen = freeze <32 x i8> %init @@ -2554,635 +1050,64 @@ } define void @load_2byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r10, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %r11d, %r11d -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rax, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rax, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: orl %r11d, %r9d -; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %eax -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r10, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r11, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsl %r9d, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %r8d -; X64-NO-BMI2-NO-SHLD-NEXT: movw %r8w, (%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: load_2byte_chunk_of_32byte_alloca: +; X64: # %bb.0: +; X64-NEXT: movdqu (%rdi), %xmm0 +; X64-NEXT: movdqu 16(%rdi), %xmm1 +; X64-NEXT: shll $3, %esi +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] +; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm3, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm2, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: shrb $3, %sil +; X64-NEXT: movzbl %sil, %eax +; X64-NEXT: movq -64(%rsp,%rax), %rax +; X64-NEXT: movw %ax, (%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %r9 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r9, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %r11d, %r11d -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orl %r11d, %r8d -; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r9 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %sil, %sil -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsl %r8d, %edi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: movw %ax, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %sil, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r9, %rdi, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %r11d, %r11d -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r10, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %r10d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%r8,%r8), %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r10, %rbx, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r9, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %r8, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r10, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orl %r11d, %r8d -; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %r9d -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %rdi, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r9d, %r10d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rcx,%rcx), %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r10, %r11, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rdi, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %rcx, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r10, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsl %r8d, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movw %ax, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r9, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rsi, %r9, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %sil, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %r9, %rdi, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r11d, %r11d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %r11d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %r8, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %sil, %sil -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsl %r11d, %r8d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %r8d, %eax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movw %ax, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $16, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm1 -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%eax), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: negb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: subb %dl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-128, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: addb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-128, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: subb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movw %bp, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $16, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $8, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm1 -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-128, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movw %bp, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $8, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $16, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm1 -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%ecx), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, (%esp), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %dl, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-128, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $dl killed $dl killed $edx def $edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebx,%ebx), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, (%esp), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %al, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movw %ax, (%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $16, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $12, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm1 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, (%esp), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-128, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %bl, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %ecx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movw %cx, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $12, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_2byte_chunk_of_32byte_alloca: +; X32: # %bb.0: +; X32-NEXT: subl $64, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movdqu (%edx), %xmm0 +; X32-NEXT: movdqu 16(%edx), %xmm1 +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm6 = xmm1[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm7 = xmm1[2,3,2,3] +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm7, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm6, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm5, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm4, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl (%esp,%ecx), %ecx +; X32-NEXT: movw %cx, (%eax) +; X32-NEXT: addl $64, %esp +; X32-NEXT: retl %init = load <32 x i8>, ptr %src, align 1 %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 %intermediate.val.frozen = freeze <32 x i8> %init @@ -3195,635 +1120,64 @@ } define void @load_4byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r10, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %r11d, %r11d -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rax, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rax, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: orl %r11d, %r9d -; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %eax -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r10, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r11, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsl %r9d, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %r8d -; X64-NO-BMI2-NO-SHLD-NEXT: movl %r8d, (%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: load_4byte_chunk_of_32byte_alloca: +; X64: # %bb.0: +; X64-NEXT: movdqu (%rdi), %xmm0 +; X64-NEXT: movdqu 16(%rdi), %xmm1 +; X64-NEXT: shll $3, %esi +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] +; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm3, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm2, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: shrb $3, %sil +; X64-NEXT: movzbl %sil, %eax +; X64-NEXT: movl -64(%rsp,%rax), %eax +; X64-NEXT: movl %eax, (%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %r9 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r9, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %r11d, %r11d -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orl %r11d, %r8d -; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r9 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %sil, %sil -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsl %r8d, %edi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %sil, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r9, %rdi, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %r11d, %r11d -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r10, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %r10d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%r8,%r8), %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r10, %rbx, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r9, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %r8, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r10, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orl %r11d, %r8d -; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %r9d -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %rdi, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r9d, %r10d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rcx,%rcx), %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r10, %r11, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rdi, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %rcx, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r10, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsl %r8d, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r9, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rsi, %r9, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %sil, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %r9, %rdi, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r11d, %r11d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %r11d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %r8, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %sil, %sil -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsl %r11d, %r8d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %r8d, %eax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $16, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm1 -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%eax), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: negb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: subb %dl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-128, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: addb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-128, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: subb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $16, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $8, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm1 -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-128, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $8, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $16, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm1 -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%ecx), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, (%esp), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %dl, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-128, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $dl killed $dl killed $edx def $edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebx,%ebx), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, (%esp), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %al, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $16, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $12, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm1 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, (%esp), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-128, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %bl, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %ecx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $12, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_4byte_chunk_of_32byte_alloca: +; X32: # %bb.0: +; X32-NEXT: subl $64, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movdqu (%edx), %xmm0 +; X32-NEXT: movdqu 16(%edx), %xmm1 +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm6 = xmm1[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm7 = xmm1[2,3,2,3] +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm7, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm6, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm5, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm4, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl (%esp,%ecx), %ecx +; X32-NEXT: movl %ecx, (%eax) +; X32-NEXT: addl $64, %esp +; X32-NEXT: retl %init = load <32 x i8>, ptr %src, align 1 %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 %intermediate.val.frozen = freeze <32 x i8> %init @@ -3836,897 +1190,66 @@ } define void @load_8byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r10, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %r11d, %r11d -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rax, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rax, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r11, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %eax -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r10, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r11, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %r9, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, (%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: load_8byte_chunk_of_32byte_alloca: +; X64: # %bb.0: +; X64-NEXT: movdqu (%rdi), %xmm0 +; X64-NEXT: movdqu 16(%rdi), %xmm1 +; X64-NEXT: shll $3, %esi +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] +; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm3, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm2, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: shrb $3, %sil +; X64-NEXT: movzbl %sil, %eax +; X64-NEXT: movq -64(%rsp,%rax), %rax +; X64-NEXT: movq %rax, (%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %r9 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r9, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %r11d, %r11d -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r11, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rax, %r9 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %sil, %sil -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r8, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %sil, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r9, %rdi, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %r11d, %r11d -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r10, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %r10d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%r8,%r8), %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r10, %rbx, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r9, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %r8, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r10, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r11, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %r9d -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %rdi, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r9d, %r10d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rcx,%rcx), %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r10, %r11, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rdi, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %rcx, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r10, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %r8, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rcx, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r9, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rsi, %r9, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %sil, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %r9, %rdi, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r11d, %r11d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %rcx, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %r8, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %sil, %sil -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %r11, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $64, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%eax), %xmm2 -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm2[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: subb %bl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-128, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: negb %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: addl %ebx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: addb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ebx,%ebx), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-128, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ecx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnsl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 4(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $64, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $32, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm1 -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%ecx), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-128, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm3, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %al, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl (%esp), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, 4(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $32, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $64, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %dl, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $bl killed $bl killed $ebx def $ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-128, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl (%esp), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %bl, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnsl (%esp), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 4(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $64, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $32, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm1 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%ecx), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm3, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm1[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm3, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, (%esp), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %ebx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-128, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm3, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %al, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl (%esp), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 4(%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $32, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_8byte_chunk_of_32byte_alloca: +; X32: # %bb.0: +; X32-NEXT: subl $64, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movdqu (%edx), %xmm0 +; X32-NEXT: movdqu 16(%edx), %xmm1 +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm6 = xmm1[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm7 = xmm1[2,3,2,3] +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm7, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm6, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm5, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm4, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl (%esp,%ecx), %edx +; X32-NEXT: movl 4(%esp,%ecx), %ecx +; X32-NEXT: movl %ecx, 4(%eax) +; X32-NEXT: movl %edx, (%eax) +; X32-NEXT: addl $64, %esp +; X32-NEXT: retl %init = load <32 x i8>, ptr %src, align 1 %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 %intermediate.val.frozen = freeze <32 x i8> %init @@ -4739,1522 +1262,76 @@ } define void @load_16byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %al -; X64-NO-BMI2-NO-SHLD-NEXT: subb %sil, %al -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r11, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbx, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbx, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %ebx, %ebx -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r12, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r11, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r14, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r15, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r12, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %eax -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r8,%r8), %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r11, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r8, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %r14, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r9, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %r10, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, 8(%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r15, (%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: load_16byte_chunk_of_32byte_alloca: +; X64: # %bb.0: +; X64-NEXT: movdqu (%rdi), %xmm0 +; X64-NEXT: movdqu 16(%rdi), %xmm1 +; X64-NEXT: shll $3, %esi +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] +; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm3, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm2, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: shrb $3, %sil +; X64-NEXT: movzbl %sil, %eax +; X64-NEXT: movq -64(%rsp,%rax), %rcx +; X64-NEXT: movq -56(%rsp,%rax), %rax +; X64-NEXT: movq %rax, 8(%rdx) +; X64-NEXT: movq %rcx, (%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r9 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r9, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %r10d, %r10d -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r14, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r10, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %rax, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r15, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r10, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r14, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %rbx, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rdi, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r10, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %sil, %sil -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r15, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r11, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, 8(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %sil, %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %rdi, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r8d, %r11d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r11b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %r9, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %r14d -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r14, %r9, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrq %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r11, %r9, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r10, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %r11d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r11b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r11, %r12, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r10, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %r12d, %r12d -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r10, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r12, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r12, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r11, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r10, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r14d, %r8d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %r10, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r15, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r14, %rdi, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r14b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rdi, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r12, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %rbx, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rcx, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %r9, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r8, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, 8(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r9, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %rax, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rcx, %rax, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r10d, %r10d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rbx, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r10, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rsi, %rdi, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r15, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r10, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r11, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %rbx, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r9, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %r9, %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r9, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r10, %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %sil, %sil -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %r14, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %r15, %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r9, 8(%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $128, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm1 -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%eax), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm1[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm3, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm3, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm3, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-128, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm3, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ebx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %edi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: negb %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %bl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-128, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %bl # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: negb %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %cl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnsl %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %cl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, (%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 12(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, 4(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, 8(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $128, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $120, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm1[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm4, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm4, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %edx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm4, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-128, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm3, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %al, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bh, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %bl # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, 4(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, 12(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, 8(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $120, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $144, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm1 -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%eax), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm1[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm4, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm4, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edi, %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-128, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm4, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm3, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %bl # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $bl killed $bl killed $ebx def $ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %bl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edi, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebp, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnsl %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, (%edx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, 12(%edx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 4(%edx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 8(%edx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $144, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $120, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm4, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm4, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm4, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %ecx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-128, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm3, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel (%esp), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %ebx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, 12(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 8(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $120, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_16byte_chunk_of_32byte_alloca: +; X32: # %bb.0: +; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi +; X32-NEXT: subl $64, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movdqu (%edx), %xmm0 +; X32-NEXT: movdqu 16(%edx), %xmm1 +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm6 = xmm1[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm7 = xmm1[2,3,2,3] +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm7, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm6, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm5, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm4, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl (%esp,%ecx), %edx +; X32-NEXT: movl 4(%esp,%ecx), %esi +; X32-NEXT: movl 8(%esp,%ecx), %edi +; X32-NEXT: movl 12(%esp,%ecx), %ecx +; X32-NEXT: movl %ecx, 12(%eax) +; X32-NEXT: movl %edi, 8(%eax) +; X32-NEXT: movl %esi, 4(%eax) +; X32-NEXT: movl %edx, (%eax) +; X32-NEXT: addl $64, %esp +; X32-NEXT: popl %esi +; X32-NEXT: popl %edi +; X32-NEXT: retl %init = load <32 x i8>, ptr %src, align 1 %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 %intermediate.val.frozen = freeze <32 x i8> %init @@ -6269,9 +1346,7 @@ ; no @load_32byte_chunk_of_32byte_alloca ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; ALL: {{.*}} -; X32: {{.*}} ; X32-NO-SHLD: {{.*}} ; X32-SHLD: {{.*}} -; X64: {{.*}} ; X64-NO-SHLD: {{.*}} ; X64-SHLD: {{.*}}